truthound-dashboard 1.2.1__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. truthound_dashboard/api/deps.py +28 -0
  2. truthound_dashboard/api/drift.py +1 -0
  3. truthound_dashboard/api/mask.py +164 -0
  4. truthound_dashboard/api/profile.py +11 -3
  5. truthound_dashboard/api/router.py +22 -0
  6. truthound_dashboard/api/scan.py +168 -0
  7. truthound_dashboard/api/schemas.py +13 -4
  8. truthound_dashboard/api/validations.py +33 -1
  9. truthound_dashboard/api/validators.py +85 -0
  10. truthound_dashboard/core/__init__.py +8 -0
  11. truthound_dashboard/core/phase5/activity.py +1 -1
  12. truthound_dashboard/core/services.py +457 -7
  13. truthound_dashboard/core/truthound_adapter.py +441 -26
  14. truthound_dashboard/db/__init__.py +6 -0
  15. truthound_dashboard/db/models.py +250 -1
  16. truthound_dashboard/schemas/__init__.py +52 -1
  17. truthound_dashboard/schemas/collaboration.py +1 -1
  18. truthound_dashboard/schemas/drift.py +118 -3
  19. truthound_dashboard/schemas/mask.py +209 -0
  20. truthound_dashboard/schemas/profile.py +45 -2
  21. truthound_dashboard/schemas/scan.py +312 -0
  22. truthound_dashboard/schemas/schema.py +30 -2
  23. truthound_dashboard/schemas/validation.py +60 -3
  24. truthound_dashboard/schemas/validators/__init__.py +59 -0
  25. truthound_dashboard/schemas/validators/aggregate_validators.py +238 -0
  26. truthound_dashboard/schemas/validators/anomaly_validators.py +723 -0
  27. truthound_dashboard/schemas/validators/base.py +263 -0
  28. truthound_dashboard/schemas/validators/completeness_validators.py +269 -0
  29. truthound_dashboard/schemas/validators/cross_table_validators.py +375 -0
  30. truthound_dashboard/schemas/validators/datetime_validators.py +253 -0
  31. truthound_dashboard/schemas/validators/distribution_validators.py +422 -0
  32. truthound_dashboard/schemas/validators/drift_validators.py +615 -0
  33. truthound_dashboard/schemas/validators/geospatial_validators.py +486 -0
  34. truthound_dashboard/schemas/validators/multi_column_validators.py +706 -0
  35. truthound_dashboard/schemas/validators/privacy_validators.py +531 -0
  36. truthound_dashboard/schemas/validators/query_validators.py +510 -0
  37. truthound_dashboard/schemas/validators/registry.py +318 -0
  38. truthound_dashboard/schemas/validators/schema_validators.py +408 -0
  39. truthound_dashboard/schemas/validators/string_validators.py +396 -0
  40. truthound_dashboard/schemas/validators/table_validators.py +412 -0
  41. truthound_dashboard/schemas/validators/uniqueness_validators.py +355 -0
  42. truthound_dashboard/schemas/validators.py +59 -0
  43. truthound_dashboard/static/assets/{index-BqXVFyqj.js → index-BCA8H1hO.js} +95 -95
  44. truthound_dashboard/static/assets/index-BNsSQ2fN.css +1 -0
  45. truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +1 -0
  46. truthound_dashboard/static/index.html +2 -2
  47. {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/METADATA +46 -11
  48. {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/RECORD +51 -27
  49. truthound_dashboard/static/assets/index-o8qHVDte.css +0 -1
  50. truthound_dashboard/static/assets/unmerged_dictionaries-n_T3wZTf.js +0 -1
  51. {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/WHEEL +0 -0
  52. {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/entry_points.txt +0 -0
  53. {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,723 @@
1
+ """Anomaly validators.
2
+
3
+ ML-based outlier and anomaly detection validators.
4
+ Requires: pip install truthound[anomaly] (scipy + scikit-learn)
5
+ """
6
+
7
+ from .base import (
8
+ ParameterDefinition,
9
+ ParameterType,
10
+ ValidatorCategory,
11
+ ValidatorDefinition,
12
+ )
13
+
14
+ ANOMALY_VALIDATORS: list[ValidatorDefinition] = [
15
+ ValidatorDefinition(
16
+ name="IQRAnomaly",
17
+ display_name="IQR Anomaly",
18
+ category=ValidatorCategory.ANOMALY,
19
+ description="Detects outliers using Interquartile Range (IQR) method.",
20
+ parameters=[
21
+ ParameterDefinition(
22
+ name="column",
23
+ label="Column",
24
+ type=ParameterType.COLUMN,
25
+ required=True,
26
+ ),
27
+ ParameterDefinition(
28
+ name="iqr_multiplier",
29
+ label="IQR Multiplier",
30
+ type=ParameterType.FLOAT,
31
+ description="Multiplier for IQR (1.5 = standard, 3.0 = extreme)",
32
+ default=1.5,
33
+ min_value=0,
34
+ ),
35
+ ParameterDefinition(
36
+ name="max_anomaly_ratio",
37
+ label="Max Anomaly Ratio",
38
+ type=ParameterType.FLOAT,
39
+ description="Maximum acceptable anomaly ratio (0.0-1.0)",
40
+ default=0.05,
41
+ min_value=0,
42
+ max_value=1,
43
+ ),
44
+ ParameterDefinition(
45
+ name="detect_lower",
46
+ label="Detect Lower Outliers",
47
+ type=ParameterType.BOOLEAN,
48
+ default=True,
49
+ ),
50
+ ParameterDefinition(
51
+ name="detect_upper",
52
+ label="Detect Upper Outliers",
53
+ type=ParameterType.BOOLEAN,
54
+ default=True,
55
+ ),
56
+ ],
57
+ tags=["anomaly", "outlier", "iqr", "statistics"],
58
+ severity_default="medium",
59
+ requires_extra="anomaly",
60
+ ),
61
+ ValidatorDefinition(
62
+ name="ZScoreAnomaly",
63
+ display_name="Z-Score Anomaly",
64
+ category=ValidatorCategory.ANOMALY,
65
+ description="Detects outliers using Z-score methodology.",
66
+ parameters=[
67
+ ParameterDefinition(
68
+ name="column",
69
+ label="Column",
70
+ type=ParameterType.COLUMN,
71
+ required=True,
72
+ ),
73
+ ParameterDefinition(
74
+ name="threshold",
75
+ label="Z-Score Threshold",
76
+ type=ParameterType.FLOAT,
77
+ description="Z-score threshold (default: 3.0)",
78
+ default=3.0,
79
+ min_value=0,
80
+ ),
81
+ ParameterDefinition(
82
+ name="max_anomaly_ratio",
83
+ label="Max Anomaly Ratio",
84
+ type=ParameterType.FLOAT,
85
+ default=0.05,
86
+ min_value=0,
87
+ max_value=1,
88
+ ),
89
+ ],
90
+ tags=["anomaly", "outlier", "zscore", "statistics"],
91
+ severity_default="medium",
92
+ requires_extra="anomaly",
93
+ ),
94
+ ValidatorDefinition(
95
+ name="MADAnomaly",
96
+ display_name="MAD Anomaly",
97
+ category=ValidatorCategory.ANOMALY,
98
+ description="Detects outliers using Median Absolute Deviation.",
99
+ parameters=[
100
+ ParameterDefinition(
101
+ name="column",
102
+ label="Column",
103
+ type=ParameterType.COLUMN,
104
+ required=True,
105
+ ),
106
+ ParameterDefinition(
107
+ name="threshold",
108
+ label="MAD Threshold",
109
+ type=ParameterType.FLOAT,
110
+ description="MAD threshold (default: 3.5)",
111
+ default=3.5,
112
+ min_value=0,
113
+ ),
114
+ ParameterDefinition(
115
+ name="max_anomaly_ratio",
116
+ label="Max Anomaly Ratio",
117
+ type=ParameterType.FLOAT,
118
+ default=0.05,
119
+ min_value=0,
120
+ max_value=1,
121
+ ),
122
+ ],
123
+ tags=["anomaly", "outlier", "mad", "robust"],
124
+ severity_default="medium",
125
+ requires_extra="anomaly",
126
+ ),
127
+ ValidatorDefinition(
128
+ name="IsolationForest",
129
+ display_name="Isolation Forest",
130
+ category=ValidatorCategory.ANOMALY,
131
+ description="Detects anomalies using Isolation Forest algorithm.",
132
+ parameters=[
133
+ ParameterDefinition(
134
+ name="columns",
135
+ label="Columns",
136
+ type=ParameterType.COLUMN_LIST,
137
+ description="Columns to analyze (leave empty for all numeric)",
138
+ ),
139
+ ParameterDefinition(
140
+ name="contamination",
141
+ label="Contamination",
142
+ type=ParameterType.FLOAT,
143
+ description="Expected proportion of outliers (0.0-0.5, or 'auto')",
144
+ default=0.1,
145
+ min_value=0,
146
+ max_value=0.5,
147
+ ),
148
+ ParameterDefinition(
149
+ name="n_estimators",
150
+ label="Number of Estimators",
151
+ type=ParameterType.INTEGER,
152
+ description="Number of base estimators in the ensemble",
153
+ default=100,
154
+ min_value=1,
155
+ ),
156
+ ParameterDefinition(
157
+ name="max_samples",
158
+ label="Max Samples",
159
+ type=ParameterType.INTEGER,
160
+ description="Number of samples to draw (leave empty for 256)",
161
+ ),
162
+ ParameterDefinition(
163
+ name="random_state",
164
+ label="Random State",
165
+ type=ParameterType.INTEGER,
166
+ description="Random seed for reproducibility",
167
+ ),
168
+ ParameterDefinition(
169
+ name="max_anomaly_ratio",
170
+ label="Max Anomaly Ratio",
171
+ type=ParameterType.FLOAT,
172
+ default=0.1,
173
+ min_value=0,
174
+ max_value=1,
175
+ ),
176
+ ],
177
+ tags=["anomaly", "isolation_forest", "machine_learning", "multivariate"],
178
+ severity_default="medium",
179
+ requires_extra="anomaly",
180
+ ),
181
+ ValidatorDefinition(
182
+ name="LOF",
183
+ display_name="Local Outlier Factor",
184
+ category=ValidatorCategory.ANOMALY,
185
+ description="Detects anomalies using Local Outlier Factor algorithm.",
186
+ parameters=[
187
+ ParameterDefinition(
188
+ name="columns",
189
+ label="Columns",
190
+ type=ParameterType.COLUMN_LIST,
191
+ description="Columns to analyze",
192
+ ),
193
+ ParameterDefinition(
194
+ name="n_neighbors",
195
+ label="Number of Neighbors",
196
+ type=ParameterType.INTEGER,
197
+ description="Number of neighbors to use",
198
+ default=20,
199
+ min_value=1,
200
+ ),
201
+ ParameterDefinition(
202
+ name="contamination",
203
+ label="Contamination",
204
+ type=ParameterType.FLOAT,
205
+ description="Expected proportion of outliers",
206
+ default=0.1,
207
+ min_value=0,
208
+ max_value=0.5,
209
+ ),
210
+ ParameterDefinition(
211
+ name="metric",
212
+ label="Distance Metric",
213
+ type=ParameterType.SELECT,
214
+ options=[
215
+ {"value": "euclidean", "label": "Euclidean"},
216
+ {"value": "manhattan", "label": "Manhattan"},
217
+ {"value": "minkowski", "label": "Minkowski"},
218
+ {"value": "cosine", "label": "Cosine"},
219
+ ],
220
+ default="euclidean",
221
+ ),
222
+ ParameterDefinition(
223
+ name="max_anomaly_ratio",
224
+ label="Max Anomaly Ratio",
225
+ type=ParameterType.FLOAT,
226
+ default=0.1,
227
+ min_value=0,
228
+ max_value=1,
229
+ ),
230
+ ],
231
+ tags=["anomaly", "lof", "machine_learning", "density"],
232
+ severity_default="medium",
233
+ requires_extra="anomaly",
234
+ ),
235
+ ValidatorDefinition(
236
+ name="DBSCAN",
237
+ display_name="DBSCAN Anomaly",
238
+ category=ValidatorCategory.ANOMALY,
239
+ description="Detects anomalies using DBSCAN clustering (noise points).",
240
+ parameters=[
241
+ ParameterDefinition(
242
+ name="columns",
243
+ label="Columns",
244
+ type=ParameterType.COLUMN_LIST,
245
+ description="Columns to analyze",
246
+ ),
247
+ ParameterDefinition(
248
+ name="eps",
249
+ label="Epsilon (eps)",
250
+ type=ParameterType.FLOAT,
251
+ description="Maximum distance between samples in a neighborhood",
252
+ default=0.5,
253
+ min_value=0,
254
+ ),
255
+ ParameterDefinition(
256
+ name="min_samples",
257
+ label="Min Samples",
258
+ type=ParameterType.INTEGER,
259
+ description="Minimum samples in a neighborhood for a core point",
260
+ default=5,
261
+ min_value=1,
262
+ ),
263
+ ParameterDefinition(
264
+ name="metric",
265
+ label="Distance Metric",
266
+ type=ParameterType.SELECT,
267
+ options=[
268
+ {"value": "euclidean", "label": "Euclidean"},
269
+ {"value": "manhattan", "label": "Manhattan"},
270
+ {"value": "cosine", "label": "Cosine"},
271
+ ],
272
+ default="euclidean",
273
+ ),
274
+ ParameterDefinition(
275
+ name="max_anomaly_ratio",
276
+ label="Max Anomaly Ratio",
277
+ type=ParameterType.FLOAT,
278
+ default=0.1,
279
+ min_value=0,
280
+ max_value=1,
281
+ ),
282
+ ],
283
+ tags=["anomaly", "dbscan", "clustering", "density"],
284
+ severity_default="medium",
285
+ requires_extra="anomaly",
286
+ ),
287
+ ValidatorDefinition(
288
+ name="Mahalanobis",
289
+ display_name="Mahalanobis Distance",
290
+ category=ValidatorCategory.ANOMALY,
291
+ description="Detects multivariate outliers using Mahalanobis distance.",
292
+ parameters=[
293
+ ParameterDefinition(
294
+ name="columns",
295
+ label="Columns",
296
+ type=ParameterType.COLUMN_LIST,
297
+ description="Columns to analyze (leave empty for all numeric)",
298
+ ),
299
+ ParameterDefinition(
300
+ name="threshold_percentile",
301
+ label="Threshold Percentile",
302
+ type=ParameterType.FLOAT,
303
+ description="Chi-squared percentile threshold (e.g., 97.5)",
304
+ default=97.5,
305
+ min_value=0,
306
+ max_value=100,
307
+ ),
308
+ ParameterDefinition(
309
+ name="use_robust_covariance",
310
+ label="Use Robust Covariance",
311
+ type=ParameterType.BOOLEAN,
312
+ description="Use Minimum Covariance Determinant (more robust)",
313
+ default=True,
314
+ ),
315
+ ParameterDefinition(
316
+ name="max_anomaly_ratio",
317
+ label="Max Anomaly Ratio",
318
+ type=ParameterType.FLOAT,
319
+ default=0.05,
320
+ min_value=0,
321
+ max_value=1,
322
+ ),
323
+ ],
324
+ tags=["anomaly", "mahalanobis", "multivariate", "statistics"],
325
+ severity_default="medium",
326
+ requires_extra="anomaly",
327
+ ),
328
+ ValidatorDefinition(
329
+ name="OneClassSVM",
330
+ display_name="One-Class SVM",
331
+ category=ValidatorCategory.ANOMALY,
332
+ description="Detects anomalies using One-Class SVM.",
333
+ parameters=[
334
+ ParameterDefinition(
335
+ name="columns",
336
+ label="Columns",
337
+ type=ParameterType.COLUMN_LIST,
338
+ description="Columns to analyze",
339
+ ),
340
+ ParameterDefinition(
341
+ name="kernel",
342
+ label="Kernel",
343
+ type=ParameterType.SELECT,
344
+ options=[
345
+ {"value": "rbf", "label": "RBF (Radial Basis Function)"},
346
+ {"value": "linear", "label": "Linear"},
347
+ {"value": "poly", "label": "Polynomial"},
348
+ {"value": "sigmoid", "label": "Sigmoid"},
349
+ ],
350
+ default="rbf",
351
+ ),
352
+ ParameterDefinition(
353
+ name="nu",
354
+ label="Nu",
355
+ type=ParameterType.FLOAT,
356
+ description="Upper bound on the fraction of margin errors (0.0-1.0)",
357
+ default=0.1,
358
+ min_value=0,
359
+ max_value=1,
360
+ ),
361
+ ParameterDefinition(
362
+ name="gamma",
363
+ label="Gamma",
364
+ type=ParameterType.SELECT,
365
+ options=[
366
+ {"value": "scale", "label": "Scale (1 / (n_features * X.var()))"},
367
+ {"value": "auto", "label": "Auto (1 / n_features)"},
368
+ ],
369
+ default="scale",
370
+ ),
371
+ ParameterDefinition(
372
+ name="max_anomaly_ratio",
373
+ label="Max Anomaly Ratio",
374
+ type=ParameterType.FLOAT,
375
+ default=0.1,
376
+ min_value=0,
377
+ max_value=1,
378
+ ),
379
+ ],
380
+ tags=["anomaly", "svm", "machine_learning"],
381
+ severity_default="medium",
382
+ requires_extra="anomaly",
383
+ ),
384
+ ValidatorDefinition(
385
+ name="EllipticEnvelope",
386
+ display_name="Elliptic Envelope",
387
+ category=ValidatorCategory.ANOMALY,
388
+ description="Detects outliers using robust Gaussian fitting.",
389
+ parameters=[
390
+ ParameterDefinition(
391
+ name="columns",
392
+ label="Columns",
393
+ type=ParameterType.COLUMN_LIST,
394
+ description="Columns to analyze",
395
+ ),
396
+ ParameterDefinition(
397
+ name="contamination",
398
+ label="Contamination",
399
+ type=ParameterType.FLOAT,
400
+ description="Expected proportion of outliers",
401
+ default=0.1,
402
+ min_value=0,
403
+ max_value=0.5,
404
+ ),
405
+ ParameterDefinition(
406
+ name="max_anomaly_ratio",
407
+ label="Max Anomaly Ratio",
408
+ type=ParameterType.FLOAT,
409
+ default=0.1,
410
+ min_value=0,
411
+ max_value=1,
412
+ ),
413
+ ],
414
+ tags=["anomaly", "elliptic", "gaussian", "covariance"],
415
+ severity_default="medium",
416
+ requires_extra="anomaly",
417
+ ),
418
+ ValidatorDefinition(
419
+ name="PCAnomaly",
420
+ display_name="PCA Anomaly",
421
+ category=ValidatorCategory.ANOMALY,
422
+ description="Detects anomalies using PCA reconstruction error.",
423
+ parameters=[
424
+ ParameterDefinition(
425
+ name="columns",
426
+ label="Columns",
427
+ type=ParameterType.COLUMN_LIST,
428
+ description="Columns to analyze",
429
+ ),
430
+ ParameterDefinition(
431
+ name="n_components",
432
+ label="Number of Components",
433
+ type=ParameterType.INTEGER,
434
+ description="Number of principal components to use",
435
+ ),
436
+ ParameterDefinition(
437
+ name="variance_ratio",
438
+ label="Variance Ratio",
439
+ type=ParameterType.FLOAT,
440
+ description="Target explained variance ratio (0.0-1.0)",
441
+ default=0.95,
442
+ min_value=0,
443
+ max_value=1,
444
+ ),
445
+ ParameterDefinition(
446
+ name="error_percentile",
447
+ label="Error Percentile",
448
+ type=ParameterType.FLOAT,
449
+ description="Reconstruction error percentile threshold",
450
+ default=95,
451
+ min_value=0,
452
+ max_value=100,
453
+ ),
454
+ ParameterDefinition(
455
+ name="max_anomaly_ratio",
456
+ label="Max Anomaly Ratio",
457
+ type=ParameterType.FLOAT,
458
+ default=0.05,
459
+ min_value=0,
460
+ max_value=1,
461
+ ),
462
+ ],
463
+ tags=["anomaly", "pca", "dimensionality", "reconstruction"],
464
+ severity_default="medium",
465
+ requires_extra="anomaly",
466
+ ),
467
+ ValidatorDefinition(
468
+ name="GrubbsTest",
469
+ display_name="Grubbs Test",
470
+ category=ValidatorCategory.ANOMALY,
471
+ description="Detects outliers using Grubbs statistical test.",
472
+ parameters=[
473
+ ParameterDefinition(
474
+ name="column",
475
+ label="Column",
476
+ type=ParameterType.COLUMN,
477
+ required=True,
478
+ ),
479
+ ParameterDefinition(
480
+ name="alpha",
481
+ label="Significance Level",
482
+ type=ParameterType.FLOAT,
483
+ description="Significance level for the test",
484
+ default=0.05,
485
+ min_value=0,
486
+ max_value=1,
487
+ ),
488
+ ParameterDefinition(
489
+ name="max_iterations",
490
+ label="Max Iterations",
491
+ type=ParameterType.INTEGER,
492
+ description="Maximum number of outliers to remove",
493
+ default=10,
494
+ min_value=1,
495
+ ),
496
+ ParameterDefinition(
497
+ name="max_anomaly_ratio",
498
+ label="Max Anomaly Ratio",
499
+ type=ParameterType.FLOAT,
500
+ default=0.05,
501
+ min_value=0,
502
+ max_value=1,
503
+ ),
504
+ ],
505
+ tags=["anomaly", "grubbs", "statistical", "test"],
506
+ severity_default="medium",
507
+ requires_extra="anomaly",
508
+ ),
509
+ ValidatorDefinition(
510
+ name="TukeyFences",
511
+ display_name="Tukey Fences",
512
+ category=ValidatorCategory.ANOMALY,
513
+ description="Detects outliers using Tukey's inner and outer fences.",
514
+ parameters=[
515
+ ParameterDefinition(
516
+ name="column",
517
+ label="Column",
518
+ type=ParameterType.COLUMN,
519
+ required=True,
520
+ ),
521
+ ParameterDefinition(
522
+ name="detect_mild",
523
+ label="Detect Mild Outliers",
524
+ type=ParameterType.BOOLEAN,
525
+ description="Detect values outside inner fences (1.5 * IQR)",
526
+ default=True,
527
+ ),
528
+ ParameterDefinition(
529
+ name="detect_extreme",
530
+ label="Detect Extreme Outliers",
531
+ type=ParameterType.BOOLEAN,
532
+ description="Detect values outside outer fences (3.0 * IQR)",
533
+ default=True,
534
+ ),
535
+ ParameterDefinition(
536
+ name="max_anomaly_ratio",
537
+ label="Max Anomaly Ratio",
538
+ type=ParameterType.FLOAT,
539
+ default=0.05,
540
+ min_value=0,
541
+ max_value=1,
542
+ ),
543
+ ],
544
+ tags=["anomaly", "tukey", "fences", "statistics"],
545
+ severity_default="medium",
546
+ requires_extra="anomaly",
547
+ ),
548
+ ValidatorDefinition(
549
+ name="PercentileAnomaly",
550
+ display_name="Percentile Anomaly",
551
+ category=ValidatorCategory.ANOMALY,
552
+ description="Detects outliers based on percentile thresholds.",
553
+ parameters=[
554
+ ParameterDefinition(
555
+ name="column",
556
+ label="Column",
557
+ type=ParameterType.COLUMN,
558
+ required=True,
559
+ ),
560
+ ParameterDefinition(
561
+ name="lower_percentile",
562
+ label="Lower Percentile",
563
+ type=ParameterType.FLOAT,
564
+ description="Lower percentile threshold",
565
+ default=1.0,
566
+ min_value=0,
567
+ max_value=100,
568
+ ),
569
+ ParameterDefinition(
570
+ name="upper_percentile",
571
+ label="Upper Percentile",
572
+ type=ParameterType.FLOAT,
573
+ description="Upper percentile threshold",
574
+ default=99.0,
575
+ min_value=0,
576
+ max_value=100,
577
+ ),
578
+ ParameterDefinition(
579
+ name="max_anomaly_ratio",
580
+ label="Max Anomaly Ratio",
581
+ type=ParameterType.FLOAT,
582
+ default=0.02,
583
+ min_value=0,
584
+ max_value=1,
585
+ ),
586
+ ],
587
+ tags=["anomaly", "percentile", "threshold"],
588
+ severity_default="medium",
589
+ requires_extra="anomaly",
590
+ ),
591
+ ValidatorDefinition(
592
+ name="ZScoreMultivariate",
593
+ display_name="Multivariate Z-Score",
594
+ category=ValidatorCategory.ANOMALY,
595
+ description="Detects multivariate outliers using combined Z-scores.",
596
+ parameters=[
597
+ ParameterDefinition(
598
+ name="columns",
599
+ label="Columns",
600
+ type=ParameterType.COLUMN_LIST,
601
+ description="Columns to analyze",
602
+ ),
603
+ ParameterDefinition(
604
+ name="threshold",
605
+ label="Z-Score Threshold",
606
+ type=ParameterType.FLOAT,
607
+ default=3.0,
608
+ min_value=0,
609
+ ),
610
+ ParameterDefinition(
611
+ name="method",
612
+ label="Combination Method",
613
+ type=ParameterType.SELECT,
614
+ options=[
615
+ {"value": "any", "label": "Any (outlier in any column)"},
616
+ {"value": "all", "label": "All (outlier in all columns)"},
617
+ {"value": "mean", "label": "Mean (average Z-score)"},
618
+ {"value": "max", "label": "Max (maximum Z-score)"},
619
+ ],
620
+ default="any",
621
+ ),
622
+ ParameterDefinition(
623
+ name="max_anomaly_ratio",
624
+ label="Max Anomaly Ratio",
625
+ type=ParameterType.FLOAT,
626
+ default=0.05,
627
+ min_value=0,
628
+ max_value=1,
629
+ ),
630
+ ],
631
+ tags=["anomaly", "zscore", "multivariate"],
632
+ severity_default="medium",
633
+ requires_extra="anomaly",
634
+ ),
635
+ ValidatorDefinition(
636
+ name="AutoEncoder",
637
+ display_name="AutoEncoder Anomaly",
638
+ category=ValidatorCategory.ANOMALY,
639
+ description="Detects anomalies using neural network autoencoder reconstruction error.",
640
+ parameters=[
641
+ ParameterDefinition(
642
+ name="columns",
643
+ label="Columns",
644
+ type=ParameterType.COLUMN_LIST,
645
+ description="Columns to analyze",
646
+ ),
647
+ ParameterDefinition(
648
+ name="encoding_dim",
649
+ label="Encoding Dimension",
650
+ type=ParameterType.INTEGER,
651
+ description="Dimensionality of the encoded representation",
652
+ ),
653
+ ParameterDefinition(
654
+ name="epochs",
655
+ label="Training Epochs",
656
+ type=ParameterType.INTEGER,
657
+ default=50,
658
+ min_value=1,
659
+ ),
660
+ ParameterDefinition(
661
+ name="error_percentile",
662
+ label="Error Percentile",
663
+ type=ParameterType.FLOAT,
664
+ default=95,
665
+ min_value=0,
666
+ max_value=100,
667
+ ),
668
+ ParameterDefinition(
669
+ name="max_anomaly_ratio",
670
+ label="Max Anomaly Ratio",
671
+ type=ParameterType.FLOAT,
672
+ default=0.05,
673
+ min_value=0,
674
+ max_value=1,
675
+ ),
676
+ ],
677
+ tags=["anomaly", "autoencoder", "deep_learning", "neural_network"],
678
+ severity_default="medium",
679
+ requires_extra="anomaly",
680
+ experimental=True,
681
+ ),
682
+ ValidatorDefinition(
683
+ name="KMeansAnomaly",
684
+ display_name="K-Means Anomaly",
685
+ category=ValidatorCategory.ANOMALY,
686
+ description="Detects anomalies based on distance from K-Means cluster centers.",
687
+ parameters=[
688
+ ParameterDefinition(
689
+ name="columns",
690
+ label="Columns",
691
+ type=ParameterType.COLUMN_LIST,
692
+ description="Columns to analyze",
693
+ ),
694
+ ParameterDefinition(
695
+ name="n_clusters",
696
+ label="Number of Clusters",
697
+ type=ParameterType.INTEGER,
698
+ default=5,
699
+ min_value=2,
700
+ ),
701
+ ParameterDefinition(
702
+ name="distance_percentile",
703
+ label="Distance Percentile",
704
+ type=ParameterType.FLOAT,
705
+ description="Percentile threshold for anomaly detection",
706
+ default=95,
707
+ min_value=0,
708
+ max_value=100,
709
+ ),
710
+ ParameterDefinition(
711
+ name="max_anomaly_ratio",
712
+ label="Max Anomaly Ratio",
713
+ type=ParameterType.FLOAT,
714
+ default=0.05,
715
+ min_value=0,
716
+ max_value=1,
717
+ ),
718
+ ],
719
+ tags=["anomaly", "kmeans", "clustering"],
720
+ severity_default="medium",
721
+ requires_extra="anomaly",
722
+ ),
723
+ ]