truthound-dashboard 1.2.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. truthound_dashboard/api/deps.py +28 -0
  2. truthound_dashboard/api/drift.py +1 -0
  3. truthound_dashboard/api/mask.py +164 -0
  4. truthound_dashboard/api/profile.py +11 -3
  5. truthound_dashboard/api/router.py +22 -0
  6. truthound_dashboard/api/scan.py +168 -0
  7. truthound_dashboard/api/schemas.py +13 -4
  8. truthound_dashboard/api/validations.py +33 -1
  9. truthound_dashboard/api/validators.py +85 -0
  10. truthound_dashboard/core/__init__.py +8 -0
  11. truthound_dashboard/core/phase5/activity.py +1 -1
  12. truthound_dashboard/core/services.py +457 -7
  13. truthound_dashboard/core/truthound_adapter.py +441 -26
  14. truthound_dashboard/db/__init__.py +6 -0
  15. truthound_dashboard/db/models.py +250 -1
  16. truthound_dashboard/schemas/__init__.py +52 -1
  17. truthound_dashboard/schemas/collaboration.py +1 -1
  18. truthound_dashboard/schemas/drift.py +118 -3
  19. truthound_dashboard/schemas/mask.py +209 -0
  20. truthound_dashboard/schemas/profile.py +45 -2
  21. truthound_dashboard/schemas/scan.py +312 -0
  22. truthound_dashboard/schemas/schema.py +30 -2
  23. truthound_dashboard/schemas/validation.py +60 -3
  24. truthound_dashboard/schemas/validators/__init__.py +59 -0
  25. truthound_dashboard/schemas/validators/aggregate_validators.py +238 -0
  26. truthound_dashboard/schemas/validators/anomaly_validators.py +723 -0
  27. truthound_dashboard/schemas/validators/base.py +263 -0
  28. truthound_dashboard/schemas/validators/completeness_validators.py +269 -0
  29. truthound_dashboard/schemas/validators/cross_table_validators.py +375 -0
  30. truthound_dashboard/schemas/validators/datetime_validators.py +253 -0
  31. truthound_dashboard/schemas/validators/distribution_validators.py +422 -0
  32. truthound_dashboard/schemas/validators/drift_validators.py +615 -0
  33. truthound_dashboard/schemas/validators/geospatial_validators.py +486 -0
  34. truthound_dashboard/schemas/validators/multi_column_validators.py +706 -0
  35. truthound_dashboard/schemas/validators/privacy_validators.py +531 -0
  36. truthound_dashboard/schemas/validators/query_validators.py +510 -0
  37. truthound_dashboard/schemas/validators/registry.py +318 -0
  38. truthound_dashboard/schemas/validators/schema_validators.py +408 -0
  39. truthound_dashboard/schemas/validators/string_validators.py +396 -0
  40. truthound_dashboard/schemas/validators/table_validators.py +412 -0
  41. truthound_dashboard/schemas/validators/uniqueness_validators.py +355 -0
  42. truthound_dashboard/schemas/validators.py +59 -0
  43. truthound_dashboard/static/assets/index-BCA8H1hO.js +574 -0
  44. truthound_dashboard/static/assets/index-BNsSQ2fN.css +1 -0
  45. truthound_dashboard/static/assets/logo--IpBiMPK.png +0 -0
  46. truthound_dashboard/static/assets/unmerged_dictionaries-CsJWCRx9.js +1 -0
  47. truthound_dashboard/static/favicon.ico +0 -0
  48. truthound_dashboard/static/index.html +3 -3
  49. {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/METADATA +46 -11
  50. {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/RECORD +53 -28
  51. truthound_dashboard/static/assets/index-BqJMyAHX.js +0 -110
  52. truthound_dashboard/static/assets/index-DMDxHCTs.js +0 -465
  53. truthound_dashboard/static/assets/index-Dm2D11TK.css +0 -1
  54. truthound_dashboard/static/mockServiceWorker.js +0 -349
  55. {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/WHEEL +0 -0
  56. {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/entry_points.txt +0 -0
  57. {truthound_dashboard-1.2.0.dist-info → truthound_dashboard-1.3.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,263 @@
1
+ """Base validator schema definitions.
2
+
3
+ This module defines the core types and enums used across all validator categories.
4
+ """
5
+
6
+ from __future__ import annotations
7
+
8
+ from enum import Enum
9
+ from typing import Any, Literal
10
+
11
+ from pydantic import BaseModel, Field
12
+
13
+
14
+ class ValidatorCategory(str, Enum):
15
+ """Validator categories matching truthound's classification.
16
+
17
+ Categories are organized by validation purpose:
18
+ - Core validators: schema, completeness, uniqueness, distribution
19
+ - Format validators: string, datetime
20
+ - Statistical validators: aggregate, anomaly, drift
21
+ - Relational validators: cross_table, multi_column, query
22
+ - Domain validators: table, geospatial, privacy, business
23
+ - Advanced validators: time_series, referential, streaming
24
+ """
25
+
26
+ # Core validators (no extra dependencies)
27
+ SCHEMA = "schema"
28
+ COMPLETENESS = "completeness"
29
+ UNIQUENESS = "uniqueness"
30
+ DISTRIBUTION = "distribution"
31
+
32
+ # Format validators
33
+ STRING = "string"
34
+ DATETIME = "datetime"
35
+
36
+ # Statistical validators
37
+ AGGREGATE = "aggregate"
38
+ ANOMALY = "anomaly" # requires: anomaly (scipy + sklearn)
39
+ DRIFT = "drift" # requires: drift (scipy)
40
+
41
+ # Relational validators
42
+ CROSS_TABLE = "cross_table"
43
+ MULTI_COLUMN = "multi_column"
44
+ QUERY = "query"
45
+
46
+ # Domain validators
47
+ TABLE = "table"
48
+ GEOSPATIAL = "geospatial"
49
+ PRIVACY = "privacy"
50
+ BUSINESS = "business"
51
+
52
+ # Advanced validators
53
+ TIME_SERIES = "time_series"
54
+ REFERENTIAL = "referential"
55
+ STREAMING = "streaming"
56
+
57
+
58
+ class ParameterType(str, Enum):
59
+ """Supported parameter types for validator configuration."""
60
+
61
+ STRING = "string"
62
+ STRING_LIST = "string_list"
63
+ INTEGER = "integer"
64
+ FLOAT = "float"
65
+ BOOLEAN = "boolean"
66
+ SELECT = "select" # Single selection from options
67
+ MULTI_SELECT = "multi_select" # Multiple selections
68
+ COLUMN = "column" # Column name from data source
69
+ COLUMN_LIST = "column_list" # Multiple column names
70
+ SCHEMA = "schema" # JSON/YAML schema definition
71
+ EXPRESSION = "expression" # Polars expression string
72
+ REGEX = "regex" # Regular expression pattern
73
+ DATE = "date" # Date value (YYYY-MM-DD)
74
+ DATETIME = "datetime" # Datetime value
75
+ SOURCE_REF = "source_ref" # Reference to another data source
76
+
77
+
78
+ class ParameterDefinition(BaseModel):
79
+ """Definition of a validator parameter."""
80
+
81
+ name: str = Field(..., description="Parameter name (matches truthound API)")
82
+ label: str = Field(..., description="Display label for UI")
83
+ type: ParameterType = Field(..., description="Parameter type")
84
+ description: str = Field(default="", description="Help text for the parameter")
85
+ required: bool = Field(default=False, description="Whether parameter is required")
86
+ default: Any = Field(default=None, description="Default value if not specified")
87
+ options: list[dict[str, str]] | None = Field(
88
+ default=None,
89
+ description="Options for select/multi_select types [{value, label}]",
90
+ )
91
+ min_value: float | None = Field(default=None, description="Minimum for numeric types")
92
+ max_value: float | None = Field(default=None, description="Maximum for numeric types")
93
+ placeholder: str | None = Field(default=None, description="Placeholder text")
94
+ validation_pattern: str | None = Field(
95
+ default=None, description="Regex pattern for validation"
96
+ )
97
+ depends_on: str | None = Field(
98
+ default=None,
99
+ description="Parameter name this depends on (for conditional display)",
100
+ )
101
+ depends_value: Any = Field(
102
+ default=None,
103
+ description="Value the dependency must have for this param to show",
104
+ )
105
+ group: str | None = Field(
106
+ default=None,
107
+ description="Parameter group for UI organization",
108
+ )
109
+
110
+
111
+ class ValidatorDefinition(BaseModel):
112
+ """Complete definition of a validator including its parameters."""
113
+
114
+ name: str = Field(..., description="Validator class name (e.g., 'ColumnExists')")
115
+ display_name: str = Field(..., description="Human-readable name")
116
+ category: ValidatorCategory = Field(..., description="Validator category")
117
+ description: str = Field(..., description="What this validator checks")
118
+ parameters: list[ParameterDefinition] = Field(
119
+ default_factory=list, description="Configurable parameters"
120
+ )
121
+ tags: list[str] = Field(default_factory=list, description="Searchable tags")
122
+ severity_default: Literal["low", "medium", "high", "critical"] = Field(
123
+ default="medium", description="Default issue severity"
124
+ )
125
+ requires_extra: str | None = Field(
126
+ default=None,
127
+ description="Extra dependency required (e.g., 'drift', 'anomaly')",
128
+ )
129
+ experimental: bool = Field(
130
+ default=False,
131
+ description="Whether this validator is experimental",
132
+ )
133
+ deprecated: bool = Field(
134
+ default=False,
135
+ description="Whether this validator is deprecated",
136
+ )
137
+ deprecation_message: str | None = Field(
138
+ default=None,
139
+ description="Message explaining deprecation and alternatives",
140
+ )
141
+
142
+
143
+ class ValidatorConfig(BaseModel):
144
+ """Configuration for running a specific validator with parameters."""
145
+
146
+ name: str = Field(..., description="Validator name")
147
+ enabled: bool = Field(default=True, description="Whether to run this validator")
148
+ params: dict[str, Any] = Field(
149
+ default_factory=dict, description="Parameter values"
150
+ )
151
+ severity_override: Literal["low", "medium", "high", "critical"] | None = Field(
152
+ default=None, description="Override default severity"
153
+ )
154
+
155
+
156
+ class ValidatorConfigList(BaseModel):
157
+ """List of validator configurations for a validation run."""
158
+
159
+ validators: list[ValidatorConfig] = Field(
160
+ default_factory=list, description="Configured validators"
161
+ )
162
+
163
+
164
+ # ============================================================================
165
+ # Validator Config Conversion Utilities
166
+ # ============================================================================
167
+
168
+
169
+ def configs_to_truthound_format(
170
+ configs: list[ValidatorConfig],
171
+ ) -> tuple[list[str] | None, dict[str, dict[str, Any]]]:
172
+ """Convert ValidatorConfig list to truthound-compatible format.
173
+
174
+ truthound supports two ways of specifying validators:
175
+ 1. Simple list of validator names: validators=["Null", "Duplicate"]
176
+ 2. Dict-based configuration: validator_params={"Null": {"columns": ["a", "b"]}}
177
+
178
+ This function converts our ValidatorConfig format to both formats,
179
+ allowing the caller to choose based on whether custom params exist.
180
+
181
+ Args:
182
+ configs: List of ValidatorConfig objects from API request.
183
+
184
+ Returns:
185
+ Tuple of (validator_names, validator_params):
186
+ - validator_names: List of enabled validator names, or None if empty
187
+ - validator_params: Dict of {validator_name: {param: value}} for
188
+ validators with non-default parameters
189
+
190
+ Example:
191
+ >>> configs = [
192
+ ... ValidatorConfig(name="Null", enabled=True, params={"columns": ["a"]}),
193
+ ... ValidatorConfig(name="Duplicate", enabled=True, params={}),
194
+ ... ValidatorConfig(name="Range", enabled=False, params={}),
195
+ ... ]
196
+ >>> names, params = configs_to_truthound_format(configs)
197
+ >>> names
198
+ ['Null', 'Duplicate']
199
+ >>> params
200
+ {'Null': {'columns': ['a']}}
201
+ """
202
+ enabled_names: list[str] = []
203
+ validator_params: dict[str, dict[str, Any]] = {}
204
+
205
+ for config in configs:
206
+ if not config.enabled:
207
+ continue
208
+
209
+ enabled_names.append(config.name)
210
+
211
+ # Only include params that are non-empty
212
+ if config.params:
213
+ # Filter out None, empty strings, and empty lists
214
+ filtered_params = {
215
+ k: v
216
+ for k, v in config.params.items()
217
+ if v is not None and v != "" and v != []
218
+ }
219
+ if filtered_params:
220
+ validator_params[config.name] = filtered_params
221
+
222
+ return enabled_names if enabled_names else None, validator_params
223
+
224
+
225
+ def has_custom_params(configs: list[ValidatorConfig]) -> bool:
226
+ """Check if any configs have custom (non-default) parameters.
227
+
228
+ This helps determine whether to use simple or advanced mode when
229
+ calling truthound.
230
+
231
+ Args:
232
+ configs: List of ValidatorConfig objects.
233
+
234
+ Returns:
235
+ True if at least one enabled config has non-empty params.
236
+ """
237
+ for config in configs:
238
+ if not config.enabled:
239
+ continue
240
+ if config.params:
241
+ # Check for any non-empty param values
242
+ for value in config.params.values():
243
+ if value is not None and value != "" and value != []:
244
+ return True
245
+ return False
246
+
247
+
248
+ def merge_severity_overrides(
249
+ configs: list[ValidatorConfig],
250
+ ) -> dict[str, str]:
251
+ """Extract severity overrides from configs.
252
+
253
+ Args:
254
+ configs: List of ValidatorConfig objects.
255
+
256
+ Returns:
257
+ Dict mapping validator names to their severity overrides.
258
+ """
259
+ return {
260
+ config.name: config.severity_override
261
+ for config in configs
262
+ if config.enabled and config.severity_override is not None
263
+ }
@@ -0,0 +1,269 @@
1
+ """Completeness validators.
2
+
3
+ Validators for detecting missing values, empty data, and placeholder values.
4
+ """
5
+
6
+ from .base import (
7
+ ParameterDefinition,
8
+ ParameterType,
9
+ ValidatorCategory,
10
+ ValidatorDefinition,
11
+ )
12
+
13
+ COMPLETENESS_VALIDATORS: list[ValidatorDefinition] = [
14
+ ValidatorDefinition(
15
+ name="Null",
16
+ display_name="Null Values",
17
+ category=ValidatorCategory.COMPLETENESS,
18
+ description="Detects and reports null values within specified columns.",
19
+ parameters=[
20
+ ParameterDefinition(
21
+ name="columns",
22
+ label="Columns",
23
+ type=ParameterType.COLUMN_LIST,
24
+ description="Target columns (leave empty for all columns)",
25
+ ),
26
+ ParameterDefinition(
27
+ name="mostly",
28
+ label="Mostly (Threshold)",
29
+ type=ParameterType.FLOAT,
30
+ description="Acceptable non-null ratio (0.0-1.0). E.g., 0.95 means 5% nulls allowed.",
31
+ min_value=0,
32
+ max_value=1,
33
+ placeholder="0.95",
34
+ ),
35
+ ],
36
+ tags=["completeness", "null", "missing"],
37
+ severity_default="high",
38
+ ),
39
+ ValidatorDefinition(
40
+ name="NotNull",
41
+ display_name="Not Null",
42
+ category=ValidatorCategory.COMPLETENESS,
43
+ description="Ensures the specified column contains no null values.",
44
+ parameters=[
45
+ ParameterDefinition(
46
+ name="column",
47
+ label="Column",
48
+ type=ParameterType.COLUMN,
49
+ description="Column that must not contain nulls",
50
+ required=True,
51
+ ),
52
+ ],
53
+ tags=["completeness", "null", "required"],
54
+ severity_default="critical",
55
+ ),
56
+ ValidatorDefinition(
57
+ name="CompletenessRatio",
58
+ display_name="Completeness Ratio",
59
+ category=ValidatorCategory.COMPLETENESS,
60
+ description="Validates that the completeness ratio meets a minimum threshold.",
61
+ parameters=[
62
+ ParameterDefinition(
63
+ name="column",
64
+ label="Column",
65
+ type=ParameterType.COLUMN,
66
+ required=True,
67
+ ),
68
+ ParameterDefinition(
69
+ name="min_ratio",
70
+ label="Minimum Ratio",
71
+ type=ParameterType.FLOAT,
72
+ description="Minimum completeness ratio (0.0-1.0)",
73
+ required=True,
74
+ min_value=0,
75
+ max_value=1,
76
+ default=0.95,
77
+ ),
78
+ ],
79
+ tags=["completeness", "ratio", "threshold"],
80
+ severity_default="medium",
81
+ ),
82
+ ValidatorDefinition(
83
+ name="EmptyString",
84
+ display_name="Empty String",
85
+ category=ValidatorCategory.COMPLETENESS,
86
+ description="Detects empty strings in string columns.",
87
+ parameters=[
88
+ ParameterDefinition(
89
+ name="columns",
90
+ label="Columns",
91
+ type=ParameterType.COLUMN_LIST,
92
+ description="Target string columns (leave empty for all string columns)",
93
+ ),
94
+ ],
95
+ tags=["completeness", "empty", "string"],
96
+ severity_default="medium",
97
+ ),
98
+ ValidatorDefinition(
99
+ name="WhitespaceOnly",
100
+ display_name="Whitespace Only",
101
+ category=ValidatorCategory.COMPLETENESS,
102
+ description="Identifies values containing only whitespace characters.",
103
+ parameters=[
104
+ ParameterDefinition(
105
+ name="columns",
106
+ label="Columns",
107
+ type=ParameterType.COLUMN_LIST,
108
+ description="Target columns to check",
109
+ ),
110
+ ],
111
+ tags=["completeness", "whitespace", "string"],
112
+ severity_default="low",
113
+ ),
114
+ ValidatorDefinition(
115
+ name="ConditionalNull",
116
+ display_name="Conditional Null",
117
+ category=ValidatorCategory.COMPLETENESS,
118
+ description="Validates null values based on conditional logic.",
119
+ parameters=[
120
+ ParameterDefinition(
121
+ name="column",
122
+ label="Column",
123
+ type=ParameterType.COLUMN,
124
+ description="Column to check for nulls",
125
+ required=True,
126
+ ),
127
+ ParameterDefinition(
128
+ name="condition",
129
+ label="Condition Expression",
130
+ type=ParameterType.EXPRESSION,
131
+ description="Polars expression defining when column must not be null",
132
+ required=True,
133
+ placeholder='status == "active"',
134
+ ),
135
+ ],
136
+ tags=["completeness", "conditional", "null"],
137
+ severity_default="high",
138
+ ),
139
+ ValidatorDefinition(
140
+ name="DefaultValue",
141
+ display_name="Default Value Detection",
142
+ category=ValidatorCategory.COMPLETENESS,
143
+ description="Detects values matching default or placeholder patterns.",
144
+ parameters=[
145
+ ParameterDefinition(
146
+ name="column",
147
+ label="Column",
148
+ type=ParameterType.COLUMN,
149
+ required=True,
150
+ ),
151
+ ParameterDefinition(
152
+ name="default_values",
153
+ label="Default Values",
154
+ type=ParameterType.STRING_LIST,
155
+ description="Values considered as defaults/placeholders",
156
+ required=True,
157
+ placeholder="N/A, TBD, unknown, -1",
158
+ ),
159
+ ],
160
+ tags=["completeness", "default", "placeholder"],
161
+ severity_default="low",
162
+ ),
163
+ ValidatorDefinition(
164
+ name="RequiredFields",
165
+ display_name="Required Fields",
166
+ category=ValidatorCategory.COMPLETENESS,
167
+ description="Validates that all specified fields have non-null, non-empty values.",
168
+ parameters=[
169
+ ParameterDefinition(
170
+ name="columns",
171
+ label="Required Columns",
172
+ type=ParameterType.COLUMN_LIST,
173
+ description="Columns that must have values",
174
+ required=True,
175
+ ),
176
+ ParameterDefinition(
177
+ name="treat_empty_as_null",
178
+ label="Treat Empty as Null",
179
+ type=ParameterType.BOOLEAN,
180
+ description="Consider empty strings as missing values",
181
+ default=True,
182
+ ),
183
+ ],
184
+ tags=["completeness", "required", "mandatory"],
185
+ severity_default="critical",
186
+ ),
187
+ ValidatorDefinition(
188
+ name="ConditionalRequired",
189
+ display_name="Conditional Required",
190
+ category=ValidatorCategory.COMPLETENESS,
191
+ description="Requires a field when another field meets a condition.",
192
+ parameters=[
193
+ ParameterDefinition(
194
+ name="column",
195
+ label="Required Column",
196
+ type=ParameterType.COLUMN,
197
+ description="Column that must have a value when condition is met",
198
+ required=True,
199
+ ),
200
+ ParameterDefinition(
201
+ name="when_column",
202
+ label="Condition Column",
203
+ type=ParameterType.COLUMN,
204
+ description="Column to check for the condition",
205
+ required=True,
206
+ ),
207
+ ParameterDefinition(
208
+ name="when_value",
209
+ label="Condition Value",
210
+ type=ParameterType.STRING,
211
+ description="Value that triggers the requirement",
212
+ required=True,
213
+ ),
214
+ ],
215
+ tags=["completeness", "conditional", "required"],
216
+ severity_default="high",
217
+ ),
218
+ ValidatorDefinition(
219
+ name="MutualCompleteness",
220
+ display_name="Mutual Completeness",
221
+ category=ValidatorCategory.COMPLETENESS,
222
+ description="Ensures columns are either all null or all non-null together.",
223
+ parameters=[
224
+ ParameterDefinition(
225
+ name="columns",
226
+ label="Columns",
227
+ type=ParameterType.COLUMN_LIST,
228
+ description="Columns that must have mutual completeness",
229
+ required=True,
230
+ ),
231
+ ],
232
+ tags=["completeness", "mutual", "coexistence"],
233
+ severity_default="medium",
234
+ ),
235
+ ValidatorDefinition(
236
+ name="AtLeastOneNotNull",
237
+ display_name="At Least One Not Null",
238
+ category=ValidatorCategory.COMPLETENESS,
239
+ description="Ensures at least one column in a group has a non-null value.",
240
+ parameters=[
241
+ ParameterDefinition(
242
+ name="columns",
243
+ label="Columns",
244
+ type=ParameterType.COLUMN_LIST,
245
+ description="Column group where at least one must be non-null",
246
+ required=True,
247
+ ),
248
+ ],
249
+ tags=["completeness", "at_least_one", "group"],
250
+ severity_default="medium",
251
+ ),
252
+ ValidatorDefinition(
253
+ name="ExactlyOneNotNull",
254
+ display_name="Exactly One Not Null",
255
+ category=ValidatorCategory.COMPLETENESS,
256
+ description="Ensures exactly one column in a group has a non-null value.",
257
+ parameters=[
258
+ ParameterDefinition(
259
+ name="columns",
260
+ label="Columns",
261
+ type=ParameterType.COLUMN_LIST,
262
+ description="Column group where exactly one must be non-null",
263
+ required=True,
264
+ ),
265
+ ],
266
+ tags=["completeness", "exactly_one", "mutex"],
267
+ severity_default="medium",
268
+ ),
269
+ ]