truthound-dashboard 1.4.4__py3-none-any.whl → 1.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (205) hide show
  1. truthound_dashboard/api/alerts.py +75 -86
  2. truthound_dashboard/api/anomaly.py +7 -13
  3. truthound_dashboard/api/cross_alerts.py +38 -52
  4. truthound_dashboard/api/drift.py +49 -59
  5. truthound_dashboard/api/drift_monitor.py +234 -79
  6. truthound_dashboard/api/enterprise_sampling.py +498 -0
  7. truthound_dashboard/api/history.py +57 -5
  8. truthound_dashboard/api/lineage.py +3 -48
  9. truthound_dashboard/api/maintenance.py +104 -49
  10. truthound_dashboard/api/mask.py +1 -2
  11. truthound_dashboard/api/middleware.py +2 -1
  12. truthound_dashboard/api/model_monitoring.py +435 -311
  13. truthound_dashboard/api/notifications.py +227 -191
  14. truthound_dashboard/api/notifications_advanced.py +21 -20
  15. truthound_dashboard/api/observability.py +586 -0
  16. truthound_dashboard/api/plugins.py +2 -433
  17. truthound_dashboard/api/profile.py +199 -37
  18. truthound_dashboard/api/quality_reporter.py +701 -0
  19. truthound_dashboard/api/reports.py +7 -16
  20. truthound_dashboard/api/router.py +66 -0
  21. truthound_dashboard/api/rule_suggestions.py +5 -5
  22. truthound_dashboard/api/scan.py +17 -19
  23. truthound_dashboard/api/schedules.py +85 -50
  24. truthound_dashboard/api/schema_evolution.py +6 -6
  25. truthound_dashboard/api/schema_watcher.py +667 -0
  26. truthound_dashboard/api/sources.py +98 -27
  27. truthound_dashboard/api/tiering.py +1323 -0
  28. truthound_dashboard/api/triggers.py +14 -11
  29. truthound_dashboard/api/validations.py +12 -11
  30. truthound_dashboard/api/versioning.py +1 -6
  31. truthound_dashboard/core/__init__.py +129 -3
  32. truthound_dashboard/core/actions/__init__.py +62 -0
  33. truthound_dashboard/core/actions/custom.py +426 -0
  34. truthound_dashboard/core/actions/notifications.py +910 -0
  35. truthound_dashboard/core/actions/storage.py +472 -0
  36. truthound_dashboard/core/actions/webhook.py +281 -0
  37. truthound_dashboard/core/anomaly.py +262 -67
  38. truthound_dashboard/core/anomaly_explainer.py +4 -3
  39. truthound_dashboard/core/backends/__init__.py +67 -0
  40. truthound_dashboard/core/backends/base.py +299 -0
  41. truthound_dashboard/core/backends/errors.py +191 -0
  42. truthound_dashboard/core/backends/factory.py +423 -0
  43. truthound_dashboard/core/backends/mock_backend.py +451 -0
  44. truthound_dashboard/core/backends/truthound_backend.py +718 -0
  45. truthound_dashboard/core/checkpoint/__init__.py +87 -0
  46. truthound_dashboard/core/checkpoint/adapters.py +814 -0
  47. truthound_dashboard/core/checkpoint/checkpoint.py +491 -0
  48. truthound_dashboard/core/checkpoint/runner.py +270 -0
  49. truthound_dashboard/core/connections.py +645 -23
  50. truthound_dashboard/core/converters/__init__.py +14 -0
  51. truthound_dashboard/core/converters/truthound.py +620 -0
  52. truthound_dashboard/core/cross_alerts.py +540 -320
  53. truthound_dashboard/core/datasource_factory.py +1672 -0
  54. truthound_dashboard/core/drift_monitor.py +216 -20
  55. truthound_dashboard/core/enterprise_sampling.py +1291 -0
  56. truthound_dashboard/core/interfaces/__init__.py +225 -0
  57. truthound_dashboard/core/interfaces/actions.py +652 -0
  58. truthound_dashboard/core/interfaces/base.py +247 -0
  59. truthound_dashboard/core/interfaces/checkpoint.py +676 -0
  60. truthound_dashboard/core/interfaces/protocols.py +664 -0
  61. truthound_dashboard/core/interfaces/reporters.py +650 -0
  62. truthound_dashboard/core/interfaces/routing.py +646 -0
  63. truthound_dashboard/core/interfaces/triggers.py +619 -0
  64. truthound_dashboard/core/lineage.py +407 -71
  65. truthound_dashboard/core/model_monitoring.py +431 -3
  66. truthound_dashboard/core/notifications/base.py +4 -0
  67. truthound_dashboard/core/notifications/channels.py +501 -1203
  68. truthound_dashboard/core/notifications/deduplication/__init__.py +81 -115
  69. truthound_dashboard/core/notifications/deduplication/service.py +131 -348
  70. truthound_dashboard/core/notifications/dispatcher.py +202 -11
  71. truthound_dashboard/core/notifications/escalation/__init__.py +119 -106
  72. truthound_dashboard/core/notifications/escalation/engine.py +168 -358
  73. truthound_dashboard/core/notifications/routing/__init__.py +88 -128
  74. truthound_dashboard/core/notifications/routing/engine.py +90 -317
  75. truthound_dashboard/core/notifications/stats_aggregator.py +246 -1
  76. truthound_dashboard/core/notifications/throttling/__init__.py +67 -50
  77. truthound_dashboard/core/notifications/throttling/builder.py +117 -255
  78. truthound_dashboard/core/notifications/truthound_adapter.py +842 -0
  79. truthound_dashboard/core/phase5/collaboration.py +1 -1
  80. truthound_dashboard/core/plugins/lifecycle/__init__.py +0 -13
  81. truthound_dashboard/core/quality_reporter.py +1359 -0
  82. truthound_dashboard/core/report_history.py +0 -6
  83. truthound_dashboard/core/reporters/__init__.py +175 -14
  84. truthound_dashboard/core/reporters/adapters.py +943 -0
  85. truthound_dashboard/core/reporters/base.py +0 -3
  86. truthound_dashboard/core/reporters/builtin/__init__.py +18 -0
  87. truthound_dashboard/core/reporters/builtin/csv_reporter.py +111 -0
  88. truthound_dashboard/core/reporters/builtin/html_reporter.py +270 -0
  89. truthound_dashboard/core/reporters/builtin/json_reporter.py +127 -0
  90. truthound_dashboard/core/reporters/compat.py +266 -0
  91. truthound_dashboard/core/reporters/csv_reporter.py +2 -35
  92. truthound_dashboard/core/reporters/factory.py +526 -0
  93. truthound_dashboard/core/reporters/interfaces.py +745 -0
  94. truthound_dashboard/core/reporters/registry.py +1 -10
  95. truthound_dashboard/core/scheduler.py +165 -0
  96. truthound_dashboard/core/schema_evolution.py +3 -3
  97. truthound_dashboard/core/schema_watcher.py +1528 -0
  98. truthound_dashboard/core/services.py +595 -76
  99. truthound_dashboard/core/store_manager.py +810 -0
  100. truthound_dashboard/core/streaming_anomaly.py +169 -4
  101. truthound_dashboard/core/tiering.py +1309 -0
  102. truthound_dashboard/core/triggers/evaluators.py +178 -8
  103. truthound_dashboard/core/truthound_adapter.py +2620 -197
  104. truthound_dashboard/core/unified_alerts.py +23 -20
  105. truthound_dashboard/db/__init__.py +8 -0
  106. truthound_dashboard/db/database.py +8 -2
  107. truthound_dashboard/db/models.py +944 -25
  108. truthound_dashboard/db/repository.py +2 -0
  109. truthound_dashboard/main.py +15 -0
  110. truthound_dashboard/schemas/__init__.py +177 -16
  111. truthound_dashboard/schemas/base.py +44 -23
  112. truthound_dashboard/schemas/collaboration.py +19 -6
  113. truthound_dashboard/schemas/cross_alerts.py +19 -3
  114. truthound_dashboard/schemas/drift.py +61 -55
  115. truthound_dashboard/schemas/drift_monitor.py +67 -23
  116. truthound_dashboard/schemas/enterprise_sampling.py +653 -0
  117. truthound_dashboard/schemas/lineage.py +0 -33
  118. truthound_dashboard/schemas/mask.py +10 -8
  119. truthound_dashboard/schemas/model_monitoring.py +89 -10
  120. truthound_dashboard/schemas/notifications_advanced.py +13 -0
  121. truthound_dashboard/schemas/observability.py +453 -0
  122. truthound_dashboard/schemas/plugins.py +0 -280
  123. truthound_dashboard/schemas/profile.py +154 -247
  124. truthound_dashboard/schemas/quality_reporter.py +403 -0
  125. truthound_dashboard/schemas/reports.py +2 -2
  126. truthound_dashboard/schemas/rule_suggestion.py +8 -1
  127. truthound_dashboard/schemas/scan.py +4 -24
  128. truthound_dashboard/schemas/schedule.py +11 -3
  129. truthound_dashboard/schemas/schema_watcher.py +727 -0
  130. truthound_dashboard/schemas/source.py +17 -2
  131. truthound_dashboard/schemas/tiering.py +822 -0
  132. truthound_dashboard/schemas/triggers.py +16 -0
  133. truthound_dashboard/schemas/unified_alerts.py +7 -0
  134. truthound_dashboard/schemas/validation.py +0 -13
  135. truthound_dashboard/schemas/validators/base.py +41 -21
  136. truthound_dashboard/schemas/validators/business_rule_validators.py +244 -0
  137. truthound_dashboard/schemas/validators/localization_validators.py +273 -0
  138. truthound_dashboard/schemas/validators/ml_feature_validators.py +308 -0
  139. truthound_dashboard/schemas/validators/profiling_validators.py +275 -0
  140. truthound_dashboard/schemas/validators/referential_validators.py +312 -0
  141. truthound_dashboard/schemas/validators/registry.py +93 -8
  142. truthound_dashboard/schemas/validators/timeseries_validators.py +389 -0
  143. truthound_dashboard/schemas/versioning.py +1 -6
  144. truthound_dashboard/static/index.html +2 -2
  145. truthound_dashboard-1.5.1.dist-info/METADATA +312 -0
  146. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/RECORD +149 -148
  147. truthound_dashboard/core/plugins/hooks/__init__.py +0 -63
  148. truthound_dashboard/core/plugins/hooks/decorators.py +0 -367
  149. truthound_dashboard/core/plugins/hooks/manager.py +0 -403
  150. truthound_dashboard/core/plugins/hooks/protocols.py +0 -265
  151. truthound_dashboard/core/plugins/lifecycle/hot_reload.py +0 -584
  152. truthound_dashboard/core/reporters/junit_reporter.py +0 -233
  153. truthound_dashboard/core/reporters/markdown_reporter.py +0 -207
  154. truthound_dashboard/core/reporters/pdf_reporter.py +0 -209
  155. truthound_dashboard/static/assets/_baseUniq-BcrSP13d.js +0 -1
  156. truthound_dashboard/static/assets/arc-DlYjKwIL.js +0 -1
  157. truthound_dashboard/static/assets/architectureDiagram-VXUJARFQ-Bb2drbQM.js +0 -36
  158. truthound_dashboard/static/assets/blockDiagram-VD42YOAC-BlsPG1CH.js +0 -122
  159. truthound_dashboard/static/assets/c4Diagram-YG6GDRKO-B9JdUoaC.js +0 -10
  160. truthound_dashboard/static/assets/channel-Q6mHF1Hd.js +0 -1
  161. truthound_dashboard/static/assets/chunk-4BX2VUAB-DmyoPVuJ.js +0 -1
  162. truthound_dashboard/static/assets/chunk-55IACEB6-Bcz6Siv8.js +0 -1
  163. truthound_dashboard/static/assets/chunk-B4BG7PRW-Br3G5Rum.js +0 -165
  164. truthound_dashboard/static/assets/chunk-DI55MBZ5-DuM9c23u.js +0 -220
  165. truthound_dashboard/static/assets/chunk-FMBD7UC4-DNU-5mvT.js +0 -15
  166. truthound_dashboard/static/assets/chunk-QN33PNHL-Im2yNcmS.js +0 -1
  167. truthound_dashboard/static/assets/chunk-QZHKN3VN-kZr8XFm1.js +0 -1
  168. truthound_dashboard/static/assets/chunk-TZMSLE5B-Q__360q_.js +0 -1
  169. truthound_dashboard/static/assets/classDiagram-2ON5EDUG-vtixxUyK.js +0 -1
  170. truthound_dashboard/static/assets/classDiagram-v2-WZHVMYZB-vtixxUyK.js +0 -1
  171. truthound_dashboard/static/assets/clone-BOt2LwD0.js +0 -1
  172. truthound_dashboard/static/assets/cose-bilkent-S5V4N54A-CBDw6iac.js +0 -1
  173. truthound_dashboard/static/assets/dagre-6UL2VRFP-XdKqmmY9.js +0 -4
  174. truthound_dashboard/static/assets/diagram-PSM6KHXK-DAZ8nx9V.js +0 -24
  175. truthound_dashboard/static/assets/diagram-QEK2KX5R-BRvDTbGD.js +0 -43
  176. truthound_dashboard/static/assets/diagram-S2PKOQOG-bQcczUkl.js +0 -24
  177. truthound_dashboard/static/assets/erDiagram-Q2GNP2WA-DPje7VMN.js +0 -60
  178. truthound_dashboard/static/assets/flowDiagram-NV44I4VS-B7BVtFVS.js +0 -162
  179. truthound_dashboard/static/assets/ganttDiagram-JELNMOA3-D6WKSS7U.js +0 -267
  180. truthound_dashboard/static/assets/gitGraphDiagram-NY62KEGX-D3vtVd3y.js +0 -65
  181. truthound_dashboard/static/assets/graph-BKgNKZVp.js +0 -1
  182. truthound_dashboard/static/assets/index-C6JSrkHo.css +0 -1
  183. truthound_dashboard/static/assets/index-DkU82VsU.js +0 -1800
  184. truthound_dashboard/static/assets/infoDiagram-WHAUD3N6-DnNCT429.js +0 -2
  185. truthound_dashboard/static/assets/journeyDiagram-XKPGCS4Q-DGiMozqS.js +0 -139
  186. truthound_dashboard/static/assets/kanban-definition-3W4ZIXB7-BV2gUgli.js +0 -89
  187. truthound_dashboard/static/assets/katex-Cu_Erd72.js +0 -261
  188. truthound_dashboard/static/assets/layout-DI2MfQ5G.js +0 -1
  189. truthound_dashboard/static/assets/min-DYdgXVcT.js +0 -1
  190. truthound_dashboard/static/assets/mindmap-definition-VGOIOE7T-C7x4ruxz.js +0 -68
  191. truthound_dashboard/static/assets/pieDiagram-ADFJNKIX-CAJaAB9f.js +0 -30
  192. truthound_dashboard/static/assets/quadrantDiagram-AYHSOK5B-DeqwDI46.js +0 -7
  193. truthound_dashboard/static/assets/requirementDiagram-UZGBJVZJ-e3XDpZIM.js +0 -64
  194. truthound_dashboard/static/assets/sankeyDiagram-TZEHDZUN-CNnAv5Ux.js +0 -10
  195. truthound_dashboard/static/assets/sequenceDiagram-WL72ISMW-Dsne-Of3.js +0 -145
  196. truthound_dashboard/static/assets/stateDiagram-FKZM4ZOC-Ee0sQXyb.js +0 -1
  197. truthound_dashboard/static/assets/stateDiagram-v2-4FDKWEC3-B26KqW_W.js +0 -1
  198. truthound_dashboard/static/assets/timeline-definition-IT6M3QCI-DZYi2yl3.js +0 -61
  199. truthound_dashboard/static/assets/treemap-KMMF4GRG-CY3f8In2.js +0 -128
  200. truthound_dashboard/static/assets/unmerged_dictionaries-Dd7xcPWG.js +0 -1
  201. truthound_dashboard/static/assets/xychartDiagram-PRI3JC2R-CS7fydZZ.js +0 -7
  202. truthound_dashboard-1.4.4.dist-info/METADATA +0 -507
  203. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/WHEEL +0 -0
  204. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/entry_points.txt +0 -0
  205. {truthound_dashboard-1.4.4.dist-info → truthound_dashboard-1.5.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,664 @@
1
+ """Protocol definitions for data quality operations.
2
+
3
+ This module defines the interfaces (protocols) that abstract away the
4
+ specific data quality library implementation (e.g., truthound).
5
+
6
+ Using protocols allows:
7
+ - Runtime duck typing (any object with matching methods works)
8
+ - Static type checking with mypy
9
+ - Easy mocking for tests
10
+ - Future backend swapping without code changes
11
+
12
+ Example:
13
+ class MyCustomBackend(IDataQualityBackend):
14
+ async def check(self, data, **kwargs) -> ICheckResult:
15
+ # Custom implementation
16
+ pass
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ from enum import Enum, auto
22
+ from typing import Any, Protocol, Union, runtime_checkable
23
+
24
+ # Type alias for data input - can be path string or DataSource object
25
+ DataInput = Union[str, Any]
26
+
27
+
28
+ # =============================================================================
29
+ # Data Source Capabilities
30
+ # =============================================================================
31
+
32
+
33
+ class DataSourceCapability(Enum):
34
+ """Capabilities that a data source may support.
35
+
36
+ This enum mirrors truthound's DataSourceCapability for loose coupling.
37
+ Data sources declare their capabilities to enable optimizations.
38
+ """
39
+ LAZY_EVALUATION = auto() # Supports lazy/deferred execution
40
+ SQL_PUSHDOWN = auto() # Can push operations to database
41
+ SAMPLING = auto() # Supports data sampling
42
+ STREAMING = auto() # Supports streaming processing
43
+ SCHEMA_INFERENCE = auto() # Can infer schema automatically
44
+ ROW_COUNT = auto() # Can efficiently count rows
45
+ CONNECTION_TEST = auto() # Supports connection testing
46
+
47
+
48
+ # =============================================================================
49
+ # Data Source Configuration Protocol
50
+ # =============================================================================
51
+
52
+
53
+ @runtime_checkable
54
+ class IDataSourceConfig(Protocol):
55
+ """Protocol for data source configuration objects.
56
+
57
+ This abstracts away the specific configuration implementation
58
+ to allow different backends to use their own config classes.
59
+ """
60
+
61
+ @property
62
+ def name(self) -> str | None:
63
+ """Get the source name."""
64
+ ...
65
+
66
+ @property
67
+ def max_rows(self) -> int | None:
68
+ """Get max rows limit."""
69
+ ...
70
+
71
+ @property
72
+ def sample_size(self) -> int | None:
73
+ """Get default sample size."""
74
+ ...
75
+
76
+
77
+ # =============================================================================
78
+ # Data Source Protocol
79
+ # =============================================================================
80
+
81
+
82
+ @runtime_checkable
83
+ class IDataSource(Protocol):
84
+ """Protocol for data source objects.
85
+
86
+ Any object that provides access to tabular data should implement
87
+ this interface. This abstracts away the specific DataSource
88
+ implementation from truthound or other libraries.
89
+ """
90
+
91
+ @property
92
+ def name(self) -> str:
93
+ """Get the data source name."""
94
+ ...
95
+
96
+ @property
97
+ def columns(self) -> list[str]:
98
+ """Get list of column names."""
99
+ ...
100
+
101
+ @property
102
+ def row_count(self) -> int | None:
103
+ """Get row count if available."""
104
+ ...
105
+
106
+ @property
107
+ def capabilities(self) -> set[DataSourceCapability]:
108
+ """Get the capabilities of this data source.
109
+
110
+ Returns:
111
+ Set of capabilities this source supports.
112
+ """
113
+ ...
114
+
115
+ def to_polars_lazyframe(self) -> Any:
116
+ """Convert to Polars LazyFrame for processing.
117
+
118
+ Returns:
119
+ Polars LazyFrame representation of the data.
120
+ """
121
+ ...
122
+
123
+
124
+ # =============================================================================
125
+ # Validation Issue Protocol
126
+ # =============================================================================
127
+
128
+
129
+ @runtime_checkable
130
+ class IValidationIssue(Protocol):
131
+ """Protocol for validation issue objects.
132
+
133
+ Represents a single data quality issue found during validation.
134
+ """
135
+
136
+ @property
137
+ def column(self) -> str:
138
+ """Column name where issue was found."""
139
+ ...
140
+
141
+ @property
142
+ def issue_type(self) -> str:
143
+ """Type of issue (e.g., 'null_values', 'out_of_range')."""
144
+ ...
145
+
146
+ @property
147
+ def count(self) -> int:
148
+ """Number of rows affected."""
149
+ ...
150
+
151
+ @property
152
+ def severity(self) -> Any:
153
+ """Issue severity (may be enum or string)."""
154
+ ...
155
+
156
+ @property
157
+ def details(self) -> str | None:
158
+ """Human-readable description."""
159
+ ...
160
+
161
+
162
+ # =============================================================================
163
+ # Result Protocols
164
+ # =============================================================================
165
+
166
+
167
+ @runtime_checkable
168
+ class ICheckResult(Protocol):
169
+ """Protocol for validation check results.
170
+
171
+ Contains the results of running data validation.
172
+ """
173
+
174
+ @property
175
+ def issues(self) -> list[Any]:
176
+ """List of validation issues found."""
177
+ ...
178
+
179
+ @property
180
+ def passed(self) -> bool:
181
+ """Whether validation passed (no issues)."""
182
+ ...
183
+
184
+ @property
185
+ def has_critical(self) -> bool:
186
+ """Whether critical issues were found."""
187
+ ...
188
+
189
+ @property
190
+ def has_high(self) -> bool:
191
+ """Whether high severity issues were found."""
192
+ ...
193
+
194
+ @property
195
+ def row_count(self) -> int:
196
+ """Number of rows validated."""
197
+ ...
198
+
199
+ @property
200
+ def column_count(self) -> int:
201
+ """Number of columns."""
202
+ ...
203
+
204
+ @property
205
+ def source(self) -> str:
206
+ """Data source name or path."""
207
+ ...
208
+
209
+ def to_dict(self) -> dict[str, Any]:
210
+ """Convert to dictionary."""
211
+ ...
212
+
213
+
214
+ @runtime_checkable
215
+ class ILearnResult(Protocol):
216
+ """Protocol for schema learning results."""
217
+
218
+ @property
219
+ def schema(self) -> dict[str, Any]:
220
+ """Learned schema as dictionary."""
221
+ ...
222
+
223
+ @property
224
+ def schema_yaml(self) -> str:
225
+ """Schema as YAML string."""
226
+ ...
227
+
228
+ @property
229
+ def row_count(self) -> int | None:
230
+ """Number of rows analyzed."""
231
+ ...
232
+
233
+ @property
234
+ def column_count(self) -> int:
235
+ """Number of columns."""
236
+ ...
237
+
238
+ @property
239
+ def columns(self) -> list[str]:
240
+ """List of column names."""
241
+ ...
242
+
243
+ def to_dict(self) -> dict[str, Any]:
244
+ """Convert to dictionary."""
245
+ ...
246
+
247
+
248
+ @runtime_checkable
249
+ class IColumnProfile(Protocol):
250
+ """Protocol for column-level profiling results."""
251
+
252
+ @property
253
+ def name(self) -> str:
254
+ """Column name."""
255
+ ...
256
+
257
+ @property
258
+ def physical_type(self) -> str:
259
+ """Physical data type."""
260
+ ...
261
+
262
+ @property
263
+ def inferred_type(self) -> str:
264
+ """Inferred logical type."""
265
+ ...
266
+
267
+ @property
268
+ def null_count(self) -> int:
269
+ """Number of null values."""
270
+ ...
271
+
272
+ @property
273
+ def null_ratio(self) -> float:
274
+ """Ratio of null values."""
275
+ ...
276
+
277
+ @property
278
+ def distinct_count(self) -> int:
279
+ """Number of distinct values."""
280
+ ...
281
+
282
+ @property
283
+ def unique_ratio(self) -> float:
284
+ """Ratio of unique values."""
285
+ ...
286
+
287
+ def to_dict(self) -> dict[str, Any]:
288
+ """Convert to dictionary."""
289
+ ...
290
+
291
+
292
+ @runtime_checkable
293
+ class IProfileResult(Protocol):
294
+ """Protocol for data profiling results."""
295
+
296
+ @property
297
+ def name(self) -> str:
298
+ """Table/source name."""
299
+ ...
300
+
301
+ @property
302
+ def source(self) -> str:
303
+ """Data source path or name."""
304
+ ...
305
+
306
+ @property
307
+ def row_count(self) -> int:
308
+ """Number of rows."""
309
+ ...
310
+
311
+ @property
312
+ def column_count(self) -> int:
313
+ """Number of columns."""
314
+ ...
315
+
316
+ @property
317
+ def estimated_memory_bytes(self) -> int:
318
+ """Estimated memory usage."""
319
+ ...
320
+
321
+ @property
322
+ def columns(self) -> list[Any]:
323
+ """Column profile results."""
324
+ ...
325
+
326
+ def to_dict(self) -> dict[str, Any]:
327
+ """Convert to dictionary."""
328
+ ...
329
+
330
+
331
+ @runtime_checkable
332
+ class ICompareResult(Protocol):
333
+ """Protocol for drift comparison results."""
334
+
335
+ @property
336
+ def baseline_source(self) -> str:
337
+ """Baseline data source."""
338
+ ...
339
+
340
+ @property
341
+ def current_source(self) -> str:
342
+ """Current data source."""
343
+ ...
344
+
345
+ @property
346
+ def has_drift(self) -> bool:
347
+ """Whether drift was detected."""
348
+ ...
349
+
350
+ @property
351
+ def has_high_drift(self) -> bool:
352
+ """Whether high-severity drift was detected."""
353
+ ...
354
+
355
+ @property
356
+ def drifted_columns(self) -> list[str]:
357
+ """Columns with detected drift."""
358
+ ...
359
+
360
+ @property
361
+ def columns(self) -> list[dict[str, Any]]:
362
+ """Per-column drift results."""
363
+ ...
364
+
365
+ def to_dict(self) -> dict[str, Any]:
366
+ """Convert to dictionary."""
367
+ ...
368
+
369
+
370
+ @runtime_checkable
371
+ class IScanResult(Protocol):
372
+ """Protocol for PII scan results."""
373
+
374
+ @property
375
+ def source(self) -> str:
376
+ """Data source name or path."""
377
+ ...
378
+
379
+ @property
380
+ def columns_with_pii(self) -> int:
381
+ """Number of columns with PII."""
382
+ ...
383
+
384
+ @property
385
+ def total_findings(self) -> int:
386
+ """Total PII findings."""
387
+ ...
388
+
389
+ @property
390
+ def has_violations(self) -> bool:
391
+ """Whether regulation violations were found."""
392
+ ...
393
+
394
+ @property
395
+ def findings(self) -> list[dict[str, Any]]:
396
+ """PII findings."""
397
+ ...
398
+
399
+ @property
400
+ def violations(self) -> list[dict[str, Any]]:
401
+ """Regulation violations."""
402
+ ...
403
+
404
+ def to_dict(self) -> dict[str, Any]:
405
+ """Convert to dictionary."""
406
+ ...
407
+
408
+
409
+ @runtime_checkable
410
+ class IMaskResult(Protocol):
411
+ """Protocol for data masking results."""
412
+
413
+ @property
414
+ def source(self) -> str:
415
+ """Original data source."""
416
+ ...
417
+
418
+ @property
419
+ def output_path(self) -> str:
420
+ """Path to masked output file."""
421
+ ...
422
+
423
+ @property
424
+ def columns_masked(self) -> list[str]:
425
+ """Columns that were masked."""
426
+ ...
427
+
428
+ @property
429
+ def strategy(self) -> str:
430
+ """Masking strategy used."""
431
+ ...
432
+
433
+ def to_dict(self) -> dict[str, Any]:
434
+ """Convert to dictionary."""
435
+ ...
436
+
437
+
438
+ @runtime_checkable
439
+ class IGenerateSuiteResult(Protocol):
440
+ """Protocol for validation suite generation results."""
441
+
442
+ @property
443
+ def rules(self) -> list[dict[str, Any]]:
444
+ """Generated validation rules."""
445
+ ...
446
+
447
+ @property
448
+ def rule_count(self) -> int:
449
+ """Number of rules generated."""
450
+ ...
451
+
452
+ @property
453
+ def yaml_content(self) -> str:
454
+ """Rules as YAML string."""
455
+ ...
456
+
457
+ def to_dict(self) -> dict[str, Any]:
458
+ """Convert to dictionary."""
459
+ ...
460
+
461
+
462
+ # =============================================================================
463
+ # Main Backend Interface
464
+ # =============================================================================
465
+
466
+
467
+ @runtime_checkable
468
+ class IDataQualityBackend(Protocol):
469
+ """Protocol for data quality backend implementations.
470
+
471
+ This is the main interface that all data quality backends must implement.
472
+ It provides methods for validation, profiling, schema learning, drift
473
+ detection, PII scanning, and data masking.
474
+
475
+ Example:
476
+ class TruthoundBackend(IDataQualityBackend):
477
+ async def check(self, data, **kwargs) -> ICheckResult:
478
+ import truthound as th
479
+ result = th.check(data, **kwargs)
480
+ return convert_to_check_result(result)
481
+
482
+ class MockBackend(IDataQualityBackend):
483
+ async def check(self, data, **kwargs) -> ICheckResult:
484
+ return MockCheckResult(passed=True, issues=[])
485
+ """
486
+
487
+ def is_available(self) -> bool:
488
+ """Check if the backend is available (library installed).
489
+
490
+ Returns:
491
+ True if the backend library is installed and working.
492
+ """
493
+ ...
494
+
495
+ async def check(
496
+ self,
497
+ data: DataInput,
498
+ *,
499
+ validators: list[str] | None = None,
500
+ validator_config: dict[str, dict[str, Any]] | None = None,
501
+ schema: str | None = None,
502
+ auto_schema: bool = False,
503
+ columns: list[str] | None = None,
504
+ min_severity: str | None = None,
505
+ strict: bool = False,
506
+ parallel: bool = False,
507
+ max_workers: int | None = None,
508
+ pushdown: bool | None = None,
509
+ ) -> ICheckResult:
510
+ """Run data validation.
511
+
512
+ Args:
513
+ data: File path or DataSource object.
514
+ validators: List of validator names to run.
515
+ validator_config: Per-validator configuration.
516
+ schema: Path to schema YAML file.
517
+ auto_schema: Auto-learn schema for validation.
518
+ columns: Columns to validate.
519
+ min_severity: Minimum severity to report.
520
+ strict: Raise exception on failures.
521
+ parallel: Use parallel execution.
522
+ max_workers: Max threads for parallel.
523
+ pushdown: Enable query pushdown.
524
+
525
+ Returns:
526
+ Validation result implementing ICheckResult.
527
+ """
528
+ ...
529
+
530
+ async def learn(
531
+ self,
532
+ source: DataInput,
533
+ *,
534
+ infer_constraints: bool = True,
535
+ categorical_threshold: int | None = None,
536
+ sample_size: int | None = None,
537
+ ) -> ILearnResult:
538
+ """Learn schema from data.
539
+
540
+ Args:
541
+ source: File path or DataSource object.
542
+ infer_constraints: Infer constraints from statistics.
543
+ categorical_threshold: Max unique values for categorical.
544
+ sample_size: Number of rows to sample.
545
+
546
+ Returns:
547
+ Schema result implementing ILearnResult.
548
+ """
549
+ ...
550
+
551
+ async def profile(
552
+ self,
553
+ source: DataInput,
554
+ *,
555
+ sample_size: int | None = None,
556
+ include_patterns: bool = True,
557
+ include_correlations: bool = False,
558
+ include_distributions: bool = True,
559
+ top_n_values: int = 10,
560
+ ) -> IProfileResult:
561
+ """Run data profiling.
562
+
563
+ Args:
564
+ source: File path or DataSource object.
565
+ sample_size: Max rows to sample.
566
+ include_patterns: Enable pattern detection.
567
+ include_correlations: Calculate correlations.
568
+ include_distributions: Include distribution stats.
569
+ top_n_values: Top/bottom values per column.
570
+
571
+ Returns:
572
+ Profile result implementing IProfileResult.
573
+ """
574
+ ...
575
+
576
+ async def compare(
577
+ self,
578
+ baseline: DataInput,
579
+ current: DataInput,
580
+ *,
581
+ columns: list[str] | None = None,
582
+ method: str = "auto",
583
+ threshold: float | None = None,
584
+ sample_size: int | None = None,
585
+ ) -> ICompareResult:
586
+ """Compare datasets for drift detection.
587
+
588
+ Args:
589
+ baseline: Reference data.
590
+ current: Current data to compare.
591
+ columns: Columns to compare.
592
+ method: Detection method.
593
+ threshold: Drift threshold.
594
+ sample_size: Sample size for large datasets.
595
+
596
+ Returns:
597
+ Comparison result implementing ICompareResult.
598
+ """
599
+ ...
600
+
601
+ async def scan(
602
+ self,
603
+ data: DataInput,
604
+ *,
605
+ columns: list[str] | None = None,
606
+ regulations: list[str] | None = None,
607
+ min_confidence: float = 0.8,
608
+ ) -> IScanResult:
609
+ """Scan for PII.
610
+
611
+ Args:
612
+ data: File path or DataSource object.
613
+ columns: Columns to scan.
614
+ regulations: Regulations to check.
615
+ min_confidence: Minimum PII confidence.
616
+
617
+ Returns:
618
+ Scan result implementing IScanResult.
619
+ """
620
+ ...
621
+
622
+ async def mask(
623
+ self,
624
+ data: DataInput,
625
+ output: str,
626
+ *,
627
+ columns: list[str] | None = None,
628
+ strategy: str = "redact",
629
+ ) -> IMaskResult:
630
+ """Mask sensitive data.
631
+
632
+ Args:
633
+ data: File path or DataSource object.
634
+ output: Output file path.
635
+ columns: Columns to mask.
636
+ strategy: Masking strategy.
637
+
638
+ Returns:
639
+ Mask result implementing IMaskResult.
640
+ """
641
+ ...
642
+
643
+ async def generate_suite(
644
+ self,
645
+ profile: IProfileResult | dict[str, Any],
646
+ *,
647
+ strictness: str = "medium",
648
+ preset: str = "default",
649
+ include: list[str] | None = None,
650
+ exclude: list[str] | None = None,
651
+ ) -> IGenerateSuiteResult:
652
+ """Generate validation suite from profile.
653
+
654
+ Args:
655
+ profile: Profile result or dictionary.
656
+ strictness: Rule strictness level.
657
+ preset: Rule generation preset.
658
+ include: Rule categories to include.
659
+ exclude: Rule categories to exclude.
660
+
661
+ Returns:
662
+ Suite result implementing IGenerateSuiteResult.
663
+ """
664
+ ...