truthound-dashboard 1.2.1__py3-none-any.whl → 1.3.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- truthound_dashboard/api/deps.py +28 -0
- truthound_dashboard/api/drift.py +1 -0
- truthound_dashboard/api/mask.py +164 -0
- truthound_dashboard/api/profile.py +11 -3
- truthound_dashboard/api/router.py +22 -0
- truthound_dashboard/api/scan.py +168 -0
- truthound_dashboard/api/schemas.py +13 -4
- truthound_dashboard/api/validations.py +33 -1
- truthound_dashboard/api/validators.py +85 -0
- truthound_dashboard/core/__init__.py +8 -0
- truthound_dashboard/core/phase5/activity.py +1 -1
- truthound_dashboard/core/services.py +457 -7
- truthound_dashboard/core/truthound_adapter.py +441 -26
- truthound_dashboard/db/__init__.py +6 -0
- truthound_dashboard/db/models.py +250 -1
- truthound_dashboard/schemas/__init__.py +52 -1
- truthound_dashboard/schemas/collaboration.py +1 -1
- truthound_dashboard/schemas/drift.py +118 -3
- truthound_dashboard/schemas/mask.py +209 -0
- truthound_dashboard/schemas/profile.py +45 -2
- truthound_dashboard/schemas/scan.py +312 -0
- truthound_dashboard/schemas/schema.py +30 -2
- truthound_dashboard/schemas/validation.py +60 -3
- truthound_dashboard/schemas/validators/__init__.py +59 -0
- truthound_dashboard/schemas/validators/aggregate_validators.py +238 -0
- truthound_dashboard/schemas/validators/anomaly_validators.py +723 -0
- truthound_dashboard/schemas/validators/base.py +263 -0
- truthound_dashboard/schemas/validators/completeness_validators.py +269 -0
- truthound_dashboard/schemas/validators/cross_table_validators.py +375 -0
- truthound_dashboard/schemas/validators/datetime_validators.py +253 -0
- truthound_dashboard/schemas/validators/distribution_validators.py +422 -0
- truthound_dashboard/schemas/validators/drift_validators.py +615 -0
- truthound_dashboard/schemas/validators/geospatial_validators.py +486 -0
- truthound_dashboard/schemas/validators/multi_column_validators.py +706 -0
- truthound_dashboard/schemas/validators/privacy_validators.py +531 -0
- truthound_dashboard/schemas/validators/query_validators.py +510 -0
- truthound_dashboard/schemas/validators/registry.py +318 -0
- truthound_dashboard/schemas/validators/schema_validators.py +408 -0
- truthound_dashboard/schemas/validators/string_validators.py +396 -0
- truthound_dashboard/schemas/validators/table_validators.py +412 -0
- truthound_dashboard/schemas/validators/uniqueness_validators.py +355 -0
- truthound_dashboard/schemas/validators.py +59 -0
- truthound_dashboard/static/assets/index-BZG20KuF.js +586 -0
- truthound_dashboard/static/assets/index-D_HyZ3pb.css +1 -0
- truthound_dashboard/static/assets/unmerged_dictionaries-CtpqQBm0.js +1 -0
- truthound_dashboard/static/index.html +2 -2
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.1.dist-info}/METADATA +50 -11
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.1.dist-info}/RECORD +51 -27
- truthound_dashboard/static/assets/index-BqXVFyqj.js +0 -574
- truthound_dashboard/static/assets/index-o8qHVDte.css +0 -1
- truthound_dashboard/static/assets/unmerged_dictionaries-n_T3wZTf.js +0 -1
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.1.dist-info}/WHEEL +0 -0
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.1.dist-info}/entry_points.txt +0 -0
- {truthound_dashboard-1.2.1.dist-info → truthound_dashboard-1.3.1.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,408 @@
|
|
|
1
|
+
"""Schema validators.
|
|
2
|
+
|
|
3
|
+
Validators for structural integrity: column existence, types, ordering, relationships.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .base import (
|
|
7
|
+
ParameterDefinition,
|
|
8
|
+
ParameterType,
|
|
9
|
+
ValidatorCategory,
|
|
10
|
+
ValidatorDefinition,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
SCHEMA_VALIDATORS: list[ValidatorDefinition] = [
|
|
14
|
+
ValidatorDefinition(
|
|
15
|
+
name="ColumnExists",
|
|
16
|
+
display_name="Column Exists",
|
|
17
|
+
category=ValidatorCategory.SCHEMA,
|
|
18
|
+
description="Validates that specified columns exist in the dataset.",
|
|
19
|
+
parameters=[
|
|
20
|
+
ParameterDefinition(
|
|
21
|
+
name="columns",
|
|
22
|
+
label="Required Columns",
|
|
23
|
+
type=ParameterType.COLUMN_LIST,
|
|
24
|
+
description="Column names that must exist in the dataset",
|
|
25
|
+
required=True,
|
|
26
|
+
),
|
|
27
|
+
],
|
|
28
|
+
tags=["schema", "structure", "column"],
|
|
29
|
+
severity_default="critical",
|
|
30
|
+
),
|
|
31
|
+
ValidatorDefinition(
|
|
32
|
+
name="ColumnNotExists",
|
|
33
|
+
display_name="Column Not Exists",
|
|
34
|
+
category=ValidatorCategory.SCHEMA,
|
|
35
|
+
description="Ensures specified columns are absent (e.g., deprecated or sensitive fields).",
|
|
36
|
+
parameters=[
|
|
37
|
+
ParameterDefinition(
|
|
38
|
+
name="columns",
|
|
39
|
+
label="Forbidden Columns",
|
|
40
|
+
type=ParameterType.STRING_LIST,
|
|
41
|
+
description="Column names that must NOT exist in the dataset",
|
|
42
|
+
required=True,
|
|
43
|
+
placeholder="e.g., password, ssn, credit_card",
|
|
44
|
+
),
|
|
45
|
+
],
|
|
46
|
+
tags=["schema", "structure", "column", "security"],
|
|
47
|
+
severity_default="high",
|
|
48
|
+
),
|
|
49
|
+
ValidatorDefinition(
|
|
50
|
+
name="ColumnCount",
|
|
51
|
+
display_name="Column Count",
|
|
52
|
+
category=ValidatorCategory.SCHEMA,
|
|
53
|
+
description="Validates the number of columns in the dataset.",
|
|
54
|
+
parameters=[
|
|
55
|
+
ParameterDefinition(
|
|
56
|
+
name="expected",
|
|
57
|
+
label="Expected Count",
|
|
58
|
+
type=ParameterType.INTEGER,
|
|
59
|
+
description="Exact expected column count (use min/max for range)",
|
|
60
|
+
min_value=0,
|
|
61
|
+
),
|
|
62
|
+
ParameterDefinition(
|
|
63
|
+
name="min_count",
|
|
64
|
+
label="Minimum Count",
|
|
65
|
+
type=ParameterType.INTEGER,
|
|
66
|
+
description="Minimum acceptable column count",
|
|
67
|
+
min_value=0,
|
|
68
|
+
),
|
|
69
|
+
ParameterDefinition(
|
|
70
|
+
name="max_count",
|
|
71
|
+
label="Maximum Count",
|
|
72
|
+
type=ParameterType.INTEGER,
|
|
73
|
+
description="Maximum acceptable column count",
|
|
74
|
+
min_value=0,
|
|
75
|
+
),
|
|
76
|
+
],
|
|
77
|
+
tags=["schema", "structure", "count"],
|
|
78
|
+
severity_default="medium",
|
|
79
|
+
),
|
|
80
|
+
ValidatorDefinition(
|
|
81
|
+
name="RowCount",
|
|
82
|
+
display_name="Row Count",
|
|
83
|
+
category=ValidatorCategory.SCHEMA,
|
|
84
|
+
description="Validates the number of rows in the dataset.",
|
|
85
|
+
parameters=[
|
|
86
|
+
ParameterDefinition(
|
|
87
|
+
name="expected",
|
|
88
|
+
label="Expected Count",
|
|
89
|
+
type=ParameterType.INTEGER,
|
|
90
|
+
description="Exact expected row count",
|
|
91
|
+
min_value=0,
|
|
92
|
+
),
|
|
93
|
+
ParameterDefinition(
|
|
94
|
+
name="min_count",
|
|
95
|
+
label="Minimum Count",
|
|
96
|
+
type=ParameterType.INTEGER,
|
|
97
|
+
description="Minimum acceptable row count",
|
|
98
|
+
min_value=0,
|
|
99
|
+
),
|
|
100
|
+
ParameterDefinition(
|
|
101
|
+
name="max_count",
|
|
102
|
+
label="Maximum Count",
|
|
103
|
+
type=ParameterType.INTEGER,
|
|
104
|
+
description="Maximum acceptable row count",
|
|
105
|
+
min_value=0,
|
|
106
|
+
),
|
|
107
|
+
],
|
|
108
|
+
tags=["schema", "structure", "count", "row"],
|
|
109
|
+
severity_default="medium",
|
|
110
|
+
),
|
|
111
|
+
ValidatorDefinition(
|
|
112
|
+
name="ColumnType",
|
|
113
|
+
display_name="Column Type",
|
|
114
|
+
category=ValidatorCategory.SCHEMA,
|
|
115
|
+
description="Validates that a column conforms to an expected data type.",
|
|
116
|
+
parameters=[
|
|
117
|
+
ParameterDefinition(
|
|
118
|
+
name="column",
|
|
119
|
+
label="Column",
|
|
120
|
+
type=ParameterType.COLUMN,
|
|
121
|
+
description="Target column to validate",
|
|
122
|
+
required=True,
|
|
123
|
+
),
|
|
124
|
+
ParameterDefinition(
|
|
125
|
+
name="expected_type",
|
|
126
|
+
label="Expected Type",
|
|
127
|
+
type=ParameterType.SELECT,
|
|
128
|
+
description="Expected Polars data type",
|
|
129
|
+
required=True,
|
|
130
|
+
options=[
|
|
131
|
+
{"value": "Int8", "label": "Int8"},
|
|
132
|
+
{"value": "Int16", "label": "Int16"},
|
|
133
|
+
{"value": "Int32", "label": "Int32"},
|
|
134
|
+
{"value": "Int64", "label": "Int64"},
|
|
135
|
+
{"value": "UInt8", "label": "UInt8"},
|
|
136
|
+
{"value": "UInt16", "label": "UInt16"},
|
|
137
|
+
{"value": "UInt32", "label": "UInt32"},
|
|
138
|
+
{"value": "UInt64", "label": "UInt64"},
|
|
139
|
+
{"value": "Float32", "label": "Float32"},
|
|
140
|
+
{"value": "Float64", "label": "Float64"},
|
|
141
|
+
{"value": "Boolean", "label": "Boolean"},
|
|
142
|
+
{"value": "String", "label": "String (Utf8)"},
|
|
143
|
+
{"value": "Date", "label": "Date"},
|
|
144
|
+
{"value": "Datetime", "label": "Datetime"},
|
|
145
|
+
{"value": "Duration", "label": "Duration"},
|
|
146
|
+
{"value": "Time", "label": "Time"},
|
|
147
|
+
{"value": "Categorical", "label": "Categorical"},
|
|
148
|
+
{"value": "Object", "label": "Object"},
|
|
149
|
+
],
|
|
150
|
+
),
|
|
151
|
+
],
|
|
152
|
+
tags=["schema", "type", "column"],
|
|
153
|
+
severity_default="high",
|
|
154
|
+
),
|
|
155
|
+
ValidatorDefinition(
|
|
156
|
+
name="ColumnOrder",
|
|
157
|
+
display_name="Column Order",
|
|
158
|
+
category=ValidatorCategory.SCHEMA,
|
|
159
|
+
description="Ensures columns appear in the specified order.",
|
|
160
|
+
parameters=[
|
|
161
|
+
ParameterDefinition(
|
|
162
|
+
name="expected_order",
|
|
163
|
+
label="Expected Order",
|
|
164
|
+
type=ParameterType.COLUMN_LIST,
|
|
165
|
+
description="Columns in their expected order",
|
|
166
|
+
required=True,
|
|
167
|
+
),
|
|
168
|
+
ParameterDefinition(
|
|
169
|
+
name="strict",
|
|
170
|
+
label="Strict Mode",
|
|
171
|
+
type=ParameterType.BOOLEAN,
|
|
172
|
+
description="If true, no additional columns are allowed",
|
|
173
|
+
default=False,
|
|
174
|
+
),
|
|
175
|
+
],
|
|
176
|
+
tags=["schema", "structure", "order"],
|
|
177
|
+
severity_default="low",
|
|
178
|
+
),
|
|
179
|
+
ValidatorDefinition(
|
|
180
|
+
name="TableSchema",
|
|
181
|
+
display_name="Table Schema",
|
|
182
|
+
category=ValidatorCategory.SCHEMA,
|
|
183
|
+
description="Validates the complete schema against a reference specification.",
|
|
184
|
+
parameters=[
|
|
185
|
+
ParameterDefinition(
|
|
186
|
+
name="schema",
|
|
187
|
+
label="Schema Definition",
|
|
188
|
+
type=ParameterType.SCHEMA,
|
|
189
|
+
description="Column name to type mapping (JSON format)",
|
|
190
|
+
required=True,
|
|
191
|
+
placeholder='{"id": "Int64", "name": "String", "email": "String"}',
|
|
192
|
+
),
|
|
193
|
+
ParameterDefinition(
|
|
194
|
+
name="strict",
|
|
195
|
+
label="Strict Mode",
|
|
196
|
+
type=ParameterType.BOOLEAN,
|
|
197
|
+
description="Reject extra columns not in schema",
|
|
198
|
+
default=False,
|
|
199
|
+
),
|
|
200
|
+
],
|
|
201
|
+
tags=["schema", "structure", "complete"],
|
|
202
|
+
severity_default="critical",
|
|
203
|
+
),
|
|
204
|
+
ValidatorDefinition(
|
|
205
|
+
name="ColumnPair",
|
|
206
|
+
display_name="Column Pair Relationship",
|
|
207
|
+
category=ValidatorCategory.SCHEMA,
|
|
208
|
+
description="Validates relationships between two columns.",
|
|
209
|
+
parameters=[
|
|
210
|
+
ParameterDefinition(
|
|
211
|
+
name="column_a",
|
|
212
|
+
label="First Column",
|
|
213
|
+
type=ParameterType.COLUMN,
|
|
214
|
+
description="First column in the relationship",
|
|
215
|
+
required=True,
|
|
216
|
+
),
|
|
217
|
+
ParameterDefinition(
|
|
218
|
+
name="column_b",
|
|
219
|
+
label="Second Column",
|
|
220
|
+
type=ParameterType.COLUMN,
|
|
221
|
+
description="Second column in the relationship",
|
|
222
|
+
required=True,
|
|
223
|
+
),
|
|
224
|
+
ParameterDefinition(
|
|
225
|
+
name="relationship",
|
|
226
|
+
label="Relationship Type",
|
|
227
|
+
type=ParameterType.SELECT,
|
|
228
|
+
description="Expected relationship between columns",
|
|
229
|
+
required=True,
|
|
230
|
+
options=[
|
|
231
|
+
{"value": "equal", "label": "Equal"},
|
|
232
|
+
{"value": "not_equal", "label": "Not Equal"},
|
|
233
|
+
{"value": "greater", "label": "A > B"},
|
|
234
|
+
{"value": "less", "label": "A < B"},
|
|
235
|
+
{"value": "greater_equal", "label": "A >= B"},
|
|
236
|
+
{"value": "less_equal", "label": "A <= B"},
|
|
237
|
+
],
|
|
238
|
+
),
|
|
239
|
+
],
|
|
240
|
+
tags=["schema", "relationship", "comparison"],
|
|
241
|
+
severity_default="medium",
|
|
242
|
+
),
|
|
243
|
+
ValidatorDefinition(
|
|
244
|
+
name="MultiColumnUnique",
|
|
245
|
+
display_name="Multi-Column Unique",
|
|
246
|
+
category=ValidatorCategory.SCHEMA,
|
|
247
|
+
description="Ensures uniqueness across a combination of columns (composite key).",
|
|
248
|
+
parameters=[
|
|
249
|
+
ParameterDefinition(
|
|
250
|
+
name="columns",
|
|
251
|
+
label="Composite Key Columns",
|
|
252
|
+
type=ParameterType.COLUMN_LIST,
|
|
253
|
+
description="Columns that form the composite unique key",
|
|
254
|
+
required=True,
|
|
255
|
+
),
|
|
256
|
+
],
|
|
257
|
+
tags=["schema", "uniqueness", "composite", "key"],
|
|
258
|
+
severity_default="critical",
|
|
259
|
+
),
|
|
260
|
+
ValidatorDefinition(
|
|
261
|
+
name="ReferentialIntegrity",
|
|
262
|
+
display_name="Referential Integrity",
|
|
263
|
+
category=ValidatorCategory.SCHEMA,
|
|
264
|
+
description="Validates foreign key relationships between tables.",
|
|
265
|
+
parameters=[
|
|
266
|
+
ParameterDefinition(
|
|
267
|
+
name="column",
|
|
268
|
+
label="Foreign Key Column",
|
|
269
|
+
type=ParameterType.COLUMN,
|
|
270
|
+
description="Foreign key column in the current table",
|
|
271
|
+
required=True,
|
|
272
|
+
),
|
|
273
|
+
ParameterDefinition(
|
|
274
|
+
name="reference_source_id",
|
|
275
|
+
label="Reference Source",
|
|
276
|
+
type=ParameterType.SOURCE_REF,
|
|
277
|
+
description="ID of the source containing the reference table",
|
|
278
|
+
required=True,
|
|
279
|
+
),
|
|
280
|
+
ParameterDefinition(
|
|
281
|
+
name="reference_column",
|
|
282
|
+
label="Reference Column",
|
|
283
|
+
type=ParameterType.STRING,
|
|
284
|
+
description="Primary key column in the reference table",
|
|
285
|
+
required=True,
|
|
286
|
+
),
|
|
287
|
+
],
|
|
288
|
+
tags=["schema", "foreign_key", "relationship", "integrity"],
|
|
289
|
+
severity_default="critical",
|
|
290
|
+
),
|
|
291
|
+
ValidatorDefinition(
|
|
292
|
+
name="MultiColumnSum",
|
|
293
|
+
display_name="Multi-Column Sum",
|
|
294
|
+
category=ValidatorCategory.SCHEMA,
|
|
295
|
+
description="Validates that the sum of specified columns equals an expected value.",
|
|
296
|
+
parameters=[
|
|
297
|
+
ParameterDefinition(
|
|
298
|
+
name="columns",
|
|
299
|
+
label="Columns to Sum",
|
|
300
|
+
type=ParameterType.COLUMN_LIST,
|
|
301
|
+
description="Columns whose values should be summed",
|
|
302
|
+
required=True,
|
|
303
|
+
),
|
|
304
|
+
ParameterDefinition(
|
|
305
|
+
name="expected_sum",
|
|
306
|
+
label="Expected Sum",
|
|
307
|
+
type=ParameterType.FLOAT,
|
|
308
|
+
description="Expected sum value",
|
|
309
|
+
required=True,
|
|
310
|
+
),
|
|
311
|
+
ParameterDefinition(
|
|
312
|
+
name="tolerance",
|
|
313
|
+
label="Tolerance",
|
|
314
|
+
type=ParameterType.FLOAT,
|
|
315
|
+
description="Acceptable tolerance for floating point comparison",
|
|
316
|
+
default=0.0001,
|
|
317
|
+
min_value=0,
|
|
318
|
+
),
|
|
319
|
+
],
|
|
320
|
+
tags=["schema", "arithmetic", "sum", "calculation"],
|
|
321
|
+
severity_default="medium",
|
|
322
|
+
),
|
|
323
|
+
ValidatorDefinition(
|
|
324
|
+
name="MultiColumnCalculation",
|
|
325
|
+
display_name="Multi-Column Calculation",
|
|
326
|
+
category=ValidatorCategory.SCHEMA,
|
|
327
|
+
description="Validates arbitrary arithmetic relationships between columns.",
|
|
328
|
+
parameters=[
|
|
329
|
+
ParameterDefinition(
|
|
330
|
+
name="expression",
|
|
331
|
+
label="Expression",
|
|
332
|
+
type=ParameterType.EXPRESSION,
|
|
333
|
+
description="Mathematical expression (e.g., 'price * quantity == total')",
|
|
334
|
+
required=True,
|
|
335
|
+
placeholder="price * quantity == total",
|
|
336
|
+
),
|
|
337
|
+
ParameterDefinition(
|
|
338
|
+
name="tolerance",
|
|
339
|
+
label="Tolerance",
|
|
340
|
+
type=ParameterType.FLOAT,
|
|
341
|
+
description="Acceptable tolerance for comparison",
|
|
342
|
+
default=0.0001,
|
|
343
|
+
min_value=0,
|
|
344
|
+
),
|
|
345
|
+
],
|
|
346
|
+
tags=["schema", "arithmetic", "expression", "calculation"],
|
|
347
|
+
severity_default="medium",
|
|
348
|
+
),
|
|
349
|
+
ValidatorDefinition(
|
|
350
|
+
name="ColumnPairInSet",
|
|
351
|
+
display_name="Column Pair In Set",
|
|
352
|
+
category=ValidatorCategory.SCHEMA,
|
|
353
|
+
description="Validates that column value pairs exist within a predefined set.",
|
|
354
|
+
parameters=[
|
|
355
|
+
ParameterDefinition(
|
|
356
|
+
name="column_a",
|
|
357
|
+
label="First Column",
|
|
358
|
+
type=ParameterType.COLUMN,
|
|
359
|
+
required=True,
|
|
360
|
+
),
|
|
361
|
+
ParameterDefinition(
|
|
362
|
+
name="column_b",
|
|
363
|
+
label="Second Column",
|
|
364
|
+
type=ParameterType.COLUMN,
|
|
365
|
+
required=True,
|
|
366
|
+
),
|
|
367
|
+
ParameterDefinition(
|
|
368
|
+
name="valid_pairs",
|
|
369
|
+
label="Valid Pairs",
|
|
370
|
+
type=ParameterType.SCHEMA,
|
|
371
|
+
description="JSON array of valid [a, b] pairs",
|
|
372
|
+
required=True,
|
|
373
|
+
placeholder='[["US", "USD"], ["UK", "GBP"], ["EU", "EUR"]]',
|
|
374
|
+
),
|
|
375
|
+
],
|
|
376
|
+
tags=["schema", "pair", "set", "validation"],
|
|
377
|
+
severity_default="medium",
|
|
378
|
+
),
|
|
379
|
+
ValidatorDefinition(
|
|
380
|
+
name="ColumnPairNotInSet",
|
|
381
|
+
display_name="Column Pair Not In Set",
|
|
382
|
+
category=ValidatorCategory.SCHEMA,
|
|
383
|
+
description="Ensures column value pairs do not exist within a forbidden set.",
|
|
384
|
+
parameters=[
|
|
385
|
+
ParameterDefinition(
|
|
386
|
+
name="column_a",
|
|
387
|
+
label="First Column",
|
|
388
|
+
type=ParameterType.COLUMN,
|
|
389
|
+
required=True,
|
|
390
|
+
),
|
|
391
|
+
ParameterDefinition(
|
|
392
|
+
name="column_b",
|
|
393
|
+
label="Second Column",
|
|
394
|
+
type=ParameterType.COLUMN,
|
|
395
|
+
required=True,
|
|
396
|
+
),
|
|
397
|
+
ParameterDefinition(
|
|
398
|
+
name="forbidden_pairs",
|
|
399
|
+
label="Forbidden Pairs",
|
|
400
|
+
type=ParameterType.SCHEMA,
|
|
401
|
+
description="JSON array of forbidden [a, b] pairs",
|
|
402
|
+
required=True,
|
|
403
|
+
),
|
|
404
|
+
],
|
|
405
|
+
tags=["schema", "pair", "set", "forbidden"],
|
|
406
|
+
severity_default="high",
|
|
407
|
+
),
|
|
408
|
+
]
|