truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1710 @@
1
+ """Incremental profiling validation framework.
2
+
3
+ This module provides comprehensive validation for incremental profiling:
4
+ - Change detection accuracy validation
5
+ - Fingerprint consistency validation
6
+ - Profile merge correctness validation
7
+ - Performance regression validation
8
+ - Data integrity validation
9
+
10
+ The framework is designed for high extensibility and maintainability:
11
+ - Protocol-based validators for easy extension
12
+ - Registry pattern for validator discovery
13
+ - Configurable validation strategies
14
+ - Detailed validation results with recommendations
15
+
16
+ Example:
17
+ from truthound.profiler.incremental_validation import (
18
+ IncrementalValidator,
19
+ ValidationRunner,
20
+ ValidationConfig,
21
+ )
22
+
23
+ # Create validator
24
+ validator = IncrementalValidator()
25
+
26
+ # Run validation
27
+ result = validator.validate(
28
+ original_profile=profile1,
29
+ incremental_profile=profile2,
30
+ data=df,
31
+ )
32
+
33
+ # Check results
34
+ if result.passed:
35
+ print("Validation passed!")
36
+ else:
37
+ for issue in result.issues:
38
+ print(f"{issue.severity}: {issue.message}")
39
+ """
40
+
41
+ from __future__ import annotations
42
+
43
+ import hashlib
44
+ import logging
45
+ import time
46
+ from abc import ABC, abstractmethod
47
+ from dataclasses import dataclass, field
48
+ from datetime import datetime, timedelta
49
+ from enum import Enum
50
+ from typing import Any, Callable, Protocol, Sequence, TypeVar
51
+
52
+ import polars as pl
53
+
54
+ from truthound.profiler.base import (
55
+ ColumnProfile,
56
+ TableProfile,
57
+ ProfilerConfig,
58
+ )
59
+ from truthound.profiler.incremental import (
60
+ ChangeReason,
61
+ ColumnFingerprint,
62
+ ChangeDetectionResult,
63
+ FingerprintCalculator,
64
+ IncrementalConfig,
65
+ IncrementalProfiler,
66
+ ProfileMerger,
67
+ )
68
+
69
+
70
+ # Set up logging
71
+ logger = logging.getLogger("truthound.profiler.incremental_validation")
72
+
73
+
74
+ # =============================================================================
75
+ # Validation Types
76
+ # =============================================================================
77
+
78
+
79
+ class ValidationSeverity(str, Enum):
80
+ """Severity levels for validation issues."""
81
+
82
+ INFO = "info" # Informational message
83
+ WARNING = "warning" # Potential issue
84
+ ERROR = "error" # Definite problem
85
+ CRITICAL = "critical" # Validation cannot proceed
86
+
87
+
88
+ class ValidationCategory(str, Enum):
89
+ """Categories of validation checks."""
90
+
91
+ CHANGE_DETECTION = "change_detection"
92
+ FINGERPRINT = "fingerprint"
93
+ PROFILE_MERGE = "profile_merge"
94
+ DATA_INTEGRITY = "data_integrity"
95
+ PERFORMANCE = "performance"
96
+ CONSISTENCY = "consistency"
97
+ SCHEMA = "schema"
98
+
99
+
100
+ class ValidationType(str, Enum):
101
+ """Types of validation operations."""
102
+
103
+ FULL = "full" # Complete validation
104
+ QUICK = "quick" # Fast essential checks
105
+ CHANGE_ONLY = "change_only" # Only change detection
106
+ MERGE_ONLY = "merge_only" # Only merge validation
107
+
108
+
109
+ # =============================================================================
110
+ # Validation Results
111
+ # =============================================================================
112
+
113
+
114
+ @dataclass
115
+ class ValidationIssue:
116
+ """A single validation issue found during checks.
117
+
118
+ Attributes:
119
+ category: Category of the issue
120
+ severity: How severe the issue is
121
+ message: Human-readable description
122
+ column_name: Affected column (if applicable)
123
+ expected: Expected value
124
+ actual: Actual value found
125
+ recommendation: Suggested fix
126
+ metadata: Additional context
127
+ """
128
+
129
+ category: ValidationCategory
130
+ severity: ValidationSeverity
131
+ message: str
132
+ column_name: str | None = None
133
+ expected: Any = None
134
+ actual: Any = None
135
+ recommendation: str | None = None
136
+ metadata: dict[str, Any] = field(default_factory=dict)
137
+
138
+ def to_dict(self) -> dict[str, Any]:
139
+ """Convert to dictionary."""
140
+ return {
141
+ "category": self.category.value,
142
+ "severity": self.severity.value,
143
+ "message": self.message,
144
+ "column_name": self.column_name,
145
+ "expected": str(self.expected) if self.expected is not None else None,
146
+ "actual": str(self.actual) if self.actual is not None else None,
147
+ "recommendation": self.recommendation,
148
+ "metadata": self.metadata,
149
+ }
150
+
151
+
152
+ @dataclass
153
+ class ValidationMetrics:
154
+ """Metrics from validation run.
155
+
156
+ Attributes:
157
+ total_checks: Total number of checks performed
158
+ passed_checks: Number of checks that passed
159
+ failed_checks: Number of checks that failed
160
+ skipped_checks: Number of checks skipped
161
+ duration_ms: Total validation time
162
+ columns_validated: Number of columns validated
163
+ changes_detected: Number of changes detected
164
+ false_positives: Estimated false positive count
165
+ false_negatives: Estimated false negative count
166
+ """
167
+
168
+ total_checks: int = 0
169
+ passed_checks: int = 0
170
+ failed_checks: int = 0
171
+ skipped_checks: int = 0
172
+ duration_ms: float = 0.0
173
+ columns_validated: int = 0
174
+ changes_detected: int = 0
175
+ false_positives: int = 0
176
+ false_negatives: int = 0
177
+
178
+ @property
179
+ def pass_rate(self) -> float:
180
+ """Calculate pass rate."""
181
+ if self.total_checks == 0:
182
+ return 1.0
183
+ return self.passed_checks / self.total_checks
184
+
185
+ @property
186
+ def accuracy(self) -> float:
187
+ """Calculate change detection accuracy."""
188
+ total = self.columns_validated
189
+ if total == 0:
190
+ return 1.0
191
+ errors = self.false_positives + self.false_negatives
192
+ return (total - errors) / total
193
+
194
+ def to_dict(self) -> dict[str, Any]:
195
+ """Convert to dictionary."""
196
+ return {
197
+ "total_checks": self.total_checks,
198
+ "passed_checks": self.passed_checks,
199
+ "failed_checks": self.failed_checks,
200
+ "skipped_checks": self.skipped_checks,
201
+ "pass_rate": self.pass_rate,
202
+ "duration_ms": self.duration_ms,
203
+ "columns_validated": self.columns_validated,
204
+ "changes_detected": self.changes_detected,
205
+ "false_positives": self.false_positives,
206
+ "false_negatives": self.false_negatives,
207
+ "accuracy": self.accuracy,
208
+ }
209
+
210
+
211
+ @dataclass
212
+ class ValidationResult:
213
+ """Complete result of a validation run.
214
+
215
+ Attributes:
216
+ passed: Whether validation passed overall
217
+ validation_type: Type of validation performed
218
+ issues: List of issues found
219
+ metrics: Validation metrics
220
+ validated_at: When validation was performed
221
+ config: Configuration used
222
+ details: Additional details per category
223
+ """
224
+
225
+ passed: bool
226
+ validation_type: ValidationType
227
+ issues: list[ValidationIssue] = field(default_factory=list)
228
+ metrics: ValidationMetrics = field(default_factory=ValidationMetrics)
229
+ validated_at: datetime = field(default_factory=datetime.now)
230
+ config: dict[str, Any] = field(default_factory=dict)
231
+ details: dict[ValidationCategory, dict[str, Any]] = field(default_factory=dict)
232
+
233
+ @property
234
+ def error_count(self) -> int:
235
+ """Count of error-level issues."""
236
+ return sum(1 for i in self.issues if i.severity == ValidationSeverity.ERROR)
237
+
238
+ @property
239
+ def warning_count(self) -> int:
240
+ """Count of warning-level issues."""
241
+ return sum(1 for i in self.issues if i.severity == ValidationSeverity.WARNING)
242
+
243
+ @property
244
+ def critical_count(self) -> int:
245
+ """Count of critical issues."""
246
+ return sum(1 for i in self.issues if i.severity == ValidationSeverity.CRITICAL)
247
+
248
+ def get_issues_by_category(
249
+ self,
250
+ category: ValidationCategory,
251
+ ) -> list[ValidationIssue]:
252
+ """Get issues for a specific category."""
253
+ return [i for i in self.issues if i.category == category]
254
+
255
+ def get_issues_by_severity(
256
+ self,
257
+ severity: ValidationSeverity,
258
+ ) -> list[ValidationIssue]:
259
+ """Get issues for a specific severity."""
260
+ return [i for i in self.issues if i.severity == severity]
261
+
262
+ def to_dict(self) -> dict[str, Any]:
263
+ """Convert to dictionary."""
264
+ return {
265
+ "passed": self.passed,
266
+ "validation_type": self.validation_type.value,
267
+ "error_count": self.error_count,
268
+ "warning_count": self.warning_count,
269
+ "critical_count": self.critical_count,
270
+ "issues": [i.to_dict() for i in self.issues],
271
+ "metrics": self.metrics.to_dict(),
272
+ "validated_at": self.validated_at.isoformat(),
273
+ "config": self.config,
274
+ "details": {k.value: v for k, v in self.details.items()},
275
+ }
276
+
277
+ def to_markdown(self) -> str:
278
+ """Generate markdown report."""
279
+ lines = [
280
+ "# Incremental Profiling Validation Report",
281
+ "",
282
+ f"**Status**: {'✅ PASSED' if self.passed else '❌ FAILED'}",
283
+ f"**Validation Type**: {self.validation_type.value}",
284
+ f"**Validated At**: {self.validated_at.isoformat()}",
285
+ "",
286
+ "## Summary",
287
+ "",
288
+ f"- Total Checks: {self.metrics.total_checks}",
289
+ f"- Passed: {self.metrics.passed_checks}",
290
+ f"- Failed: {self.metrics.failed_checks}",
291
+ f"- Pass Rate: {self.metrics.pass_rate:.1%}",
292
+ f"- Duration: {self.metrics.duration_ms:.1f}ms",
293
+ "",
294
+ ]
295
+
296
+ if self.issues:
297
+ lines.extend([
298
+ "## Issues Found",
299
+ "",
300
+ ])
301
+
302
+ for severity in [
303
+ ValidationSeverity.CRITICAL,
304
+ ValidationSeverity.ERROR,
305
+ ValidationSeverity.WARNING,
306
+ ValidationSeverity.INFO,
307
+ ]:
308
+ issues = self.get_issues_by_severity(severity)
309
+ if issues:
310
+ lines.append(f"### {severity.value.title()} ({len(issues)})")
311
+ lines.append("")
312
+ for issue in issues:
313
+ col_info = f" [{issue.column_name}]" if issue.column_name else ""
314
+ lines.append(f"- **{issue.category.value}**{col_info}: {issue.message}")
315
+ if issue.recommendation:
316
+ lines.append(f" - *Recommendation*: {issue.recommendation}")
317
+ lines.append("")
318
+
319
+ return "\n".join(lines)
320
+
321
+
322
+ # =============================================================================
323
+ # Validation Protocol and Base Classes
324
+ # =============================================================================
325
+
326
+
327
+ class ValidatorProtocol(Protocol):
328
+ """Protocol for validators."""
329
+
330
+ @property
331
+ def name(self) -> str:
332
+ """Validator name."""
333
+ ...
334
+
335
+ @property
336
+ def category(self) -> ValidationCategory:
337
+ """Validation category."""
338
+ ...
339
+
340
+ def validate(
341
+ self,
342
+ context: "ValidationContext",
343
+ ) -> list[ValidationIssue]:
344
+ """Perform validation.
345
+
346
+ Args:
347
+ context: Validation context with data and profiles
348
+
349
+ Returns:
350
+ List of issues found (empty if validation passes)
351
+ """
352
+ ...
353
+
354
+
355
+ @dataclass
356
+ class ValidationContext:
357
+ """Context for validation operations.
358
+
359
+ Contains all data needed for validation checks.
360
+ """
361
+
362
+ # Data
363
+ data: pl.LazyFrame | pl.DataFrame
364
+
365
+ # Profiles
366
+ original_profile: TableProfile | None = None
367
+ incremental_profile: TableProfile | None = None
368
+ full_profile: TableProfile | None = None # For comparison
369
+
370
+ # Fingerprints
371
+ original_fingerprints: dict[str, ColumnFingerprint] = field(default_factory=dict)
372
+ current_fingerprints: dict[str, ColumnFingerprint] = field(default_factory=dict)
373
+
374
+ # Change detection results
375
+ change_results: dict[str, ChangeDetectionResult] = field(default_factory=dict)
376
+
377
+ # Profiling metadata
378
+ profiled_columns: set[str] = field(default_factory=set)
379
+ skipped_columns: set[str] = field(default_factory=set)
380
+ change_reasons: dict[str, ChangeReason] = field(default_factory=dict)
381
+
382
+ # Configuration
383
+ config: "ValidationConfig | None" = None
384
+
385
+
386
+ class BaseValidator(ABC):
387
+ """Base class for validators.
388
+
389
+ Provides common functionality for all validators.
390
+ """
391
+
392
+ def __init__(self, enabled: bool = True):
393
+ self.enabled = enabled
394
+ self._issues: list[ValidationIssue] = []
395
+
396
+ @property
397
+ @abstractmethod
398
+ def name(self) -> str:
399
+ """Validator name."""
400
+ pass
401
+
402
+ @property
403
+ @abstractmethod
404
+ def category(self) -> ValidationCategory:
405
+ """Validation category."""
406
+ pass
407
+
408
+ @abstractmethod
409
+ def validate(self, context: ValidationContext) -> list[ValidationIssue]:
410
+ """Perform validation."""
411
+ pass
412
+
413
+ def add_issue(
414
+ self,
415
+ severity: ValidationSeverity,
416
+ message: str,
417
+ column_name: str | None = None,
418
+ expected: Any = None,
419
+ actual: Any = None,
420
+ recommendation: str | None = None,
421
+ **metadata: Any,
422
+ ) -> None:
423
+ """Add a validation issue."""
424
+ self._issues.append(ValidationIssue(
425
+ category=self.category,
426
+ severity=severity,
427
+ message=message,
428
+ column_name=column_name,
429
+ expected=expected,
430
+ actual=actual,
431
+ recommendation=recommendation,
432
+ metadata=metadata,
433
+ ))
434
+
435
+ def reset(self) -> None:
436
+ """Reset validator state."""
437
+ self._issues = []
438
+
439
+
440
+ # =============================================================================
441
+ # Change Detection Validators
442
+ # =============================================================================
443
+
444
+
445
+ class ChangeDetectionAccuracyValidator(BaseValidator):
446
+ """Validates accuracy of change detection.
447
+
448
+ Compares incremental change detection against full re-profiling
449
+ to identify false positives and false negatives.
450
+ """
451
+
452
+ @property
453
+ def name(self) -> str:
454
+ return "change_detection_accuracy"
455
+
456
+ @property
457
+ def category(self) -> ValidationCategory:
458
+ return ValidationCategory.CHANGE_DETECTION
459
+
460
+ def __init__(
461
+ self,
462
+ tolerance: float = 0.01,
463
+ enabled: bool = True,
464
+ ):
465
+ """Initialize validator.
466
+
467
+ Args:
468
+ tolerance: Tolerance for numerical comparisons
469
+ enabled: Whether validator is enabled
470
+ """
471
+ super().__init__(enabled)
472
+ self.tolerance = tolerance
473
+
474
+ def validate(self, context: ValidationContext) -> list[ValidationIssue]:
475
+ """Validate change detection accuracy."""
476
+ self.reset()
477
+
478
+ if context.incremental_profile is None:
479
+ self.add_issue(
480
+ ValidationSeverity.ERROR,
481
+ "No incremental profile provided for validation",
482
+ recommendation="Provide an incremental profile to validate",
483
+ )
484
+ return self._issues
485
+
486
+ if context.full_profile is None:
487
+ self.add_issue(
488
+ ValidationSeverity.WARNING,
489
+ "No full profile for comparison; accuracy cannot be fully validated",
490
+ recommendation="Provide a full profile for accurate comparison",
491
+ )
492
+ return self._issues
493
+
494
+ inc_columns = {col.name: col for col in context.incremental_profile.columns}
495
+ full_columns = {col.name: col for col in context.full_profile.columns}
496
+
497
+ # Check for missing columns
498
+ for col_name in full_columns:
499
+ if col_name not in inc_columns:
500
+ self.add_issue(
501
+ ValidationSeverity.ERROR,
502
+ f"Column missing from incremental profile",
503
+ column_name=col_name,
504
+ recommendation="Check if column was incorrectly skipped",
505
+ )
506
+
507
+ # Check for extra columns
508
+ for col_name in inc_columns:
509
+ if col_name not in full_columns:
510
+ self.add_issue(
511
+ ValidationSeverity.WARNING,
512
+ f"Column in incremental but not in full profile",
513
+ column_name=col_name,
514
+ )
515
+
516
+ # Compare column profiles
517
+ for col_name, inc_col in inc_columns.items():
518
+ if col_name not in full_columns:
519
+ continue
520
+
521
+ full_col = full_columns[col_name]
522
+ self._compare_columns(col_name, inc_col, full_col, context)
523
+
524
+ return self._issues
525
+
526
+ def _compare_columns(
527
+ self,
528
+ col_name: str,
529
+ inc_col: ColumnProfile,
530
+ full_col: ColumnProfile,
531
+ context: ValidationContext,
532
+ ) -> None:
533
+ """Compare incremental and full profile for a column."""
534
+ was_skipped = col_name in context.skipped_columns
535
+ was_profiled = col_name in context.profiled_columns
536
+
537
+ # If column was skipped, it should match the original
538
+ if was_skipped and context.original_profile:
539
+ orig_columns = {c.name: c for c in context.original_profile.columns}
540
+ if col_name in orig_columns:
541
+ # Compare with original - should be identical
542
+ orig_col = orig_columns[col_name]
543
+ if inc_col.row_count != orig_col.row_count:
544
+ self.add_issue(
545
+ ValidationSeverity.ERROR,
546
+ "Skipped column has different row count from original",
547
+ column_name=col_name,
548
+ expected=orig_col.row_count,
549
+ actual=inc_col.row_count,
550
+ recommendation="Column may have changed but was not detected",
551
+ )
552
+
553
+ # Compare with full profile for accuracy
554
+ issues_found = []
555
+
556
+ # Row count
557
+ if inc_col.row_count != full_col.row_count:
558
+ issues_found.append(("row_count", inc_col.row_count, full_col.row_count))
559
+
560
+ # Null count
561
+ if inc_col.null_count != full_col.null_count:
562
+ issues_found.append(("null_count", inc_col.null_count, full_col.null_count))
563
+
564
+ # Null ratio (with tolerance)
565
+ if abs(inc_col.null_ratio - full_col.null_ratio) > self.tolerance:
566
+ issues_found.append(("null_ratio", inc_col.null_ratio, full_col.null_ratio))
567
+
568
+ # Unique count
569
+ if inc_col.distinct_count != full_col.distinct_count:
570
+ issues_found.append(("distinct_count", inc_col.distinct_count, full_col.distinct_count))
571
+
572
+ if issues_found and was_skipped:
573
+ # False negative - change was not detected
574
+ for field_name, inc_val, full_val in issues_found:
575
+ self.add_issue(
576
+ ValidationSeverity.ERROR,
577
+ f"Change in {field_name} not detected (false negative)",
578
+ column_name=col_name,
579
+ expected=full_val,
580
+ actual=inc_val,
581
+ recommendation="Increase change detection sensitivity",
582
+ false_negative=True,
583
+ )
584
+ elif not issues_found and was_profiled:
585
+ # Check if it was a false positive
586
+ if col_name in context.change_reasons:
587
+ reason = context.change_reasons[col_name]
588
+ if reason not in [ChangeReason.NEW_COLUMN, ChangeReason.FORCED]:
589
+ # Might be a false positive if data hasn't changed
590
+ # This is informational since re-profiling is safe
591
+ self.add_issue(
592
+ ValidationSeverity.INFO,
593
+ f"Column was re-profiled but no differences found (potential false positive)",
594
+ column_name=col_name,
595
+ metadata={"reason": reason.value},
596
+ false_positive=True,
597
+ )
598
+
599
+
600
+ class SchemaChangeValidator(BaseValidator):
601
+ """Validates schema change detection."""
602
+
603
+ @property
604
+ def name(self) -> str:
605
+ return "schema_change"
606
+
607
+ @property
608
+ def category(self) -> ValidationCategory:
609
+ return ValidationCategory.SCHEMA
610
+
611
+ def validate(self, context: ValidationContext) -> list[ValidationIssue]:
612
+ """Validate schema change detection."""
613
+ self.reset()
614
+
615
+ if not isinstance(context.data, (pl.LazyFrame, pl.DataFrame)):
616
+ self.add_issue(
617
+ ValidationSeverity.ERROR,
618
+ "Invalid data type for validation",
619
+ )
620
+ return self._issues
621
+
622
+ lf = context.data.lazy() if isinstance(context.data, pl.DataFrame) else context.data
623
+ schema = lf.collect_schema()
624
+
625
+ if context.original_profile:
626
+ orig_columns = {c.name: c for c in context.original_profile.columns}
627
+
628
+ for col_name, dtype in schema.items():
629
+ if col_name in orig_columns:
630
+ orig_type = orig_columns[col_name].physical_type
631
+ current_type = str(dtype)
632
+
633
+ if orig_type != current_type:
634
+ # Schema changed
635
+ was_detected = (
636
+ col_name in context.change_reasons and
637
+ context.change_reasons[col_name] == ChangeReason.SCHEMA_CHANGED
638
+ )
639
+
640
+ if not was_detected:
641
+ self.add_issue(
642
+ ValidationSeverity.ERROR,
643
+ "Schema change not detected",
644
+ column_name=col_name,
645
+ expected=orig_type,
646
+ actual=current_type,
647
+ recommendation="Enable schema change detection",
648
+ )
649
+ else:
650
+ self.add_issue(
651
+ ValidationSeverity.INFO,
652
+ "Schema change correctly detected",
653
+ column_name=col_name,
654
+ expected=orig_type,
655
+ actual=current_type,
656
+ )
657
+
658
+ return self._issues
659
+
660
+
661
+ class StalenessValidator(BaseValidator):
662
+ """Validates staleness detection."""
663
+
664
+ @property
665
+ def name(self) -> str:
666
+ return "staleness"
667
+
668
+ @property
669
+ def category(self) -> ValidationCategory:
670
+ return ValidationCategory.CHANGE_DETECTION
671
+
672
+ def __init__(
673
+ self,
674
+ max_age: timedelta | None = None,
675
+ enabled: bool = True,
676
+ ):
677
+ super().__init__(enabled)
678
+ self.max_age = max_age
679
+
680
+ def validate(self, context: ValidationContext) -> list[ValidationIssue]:
681
+ """Validate staleness detection."""
682
+ self.reset()
683
+
684
+ if context.original_profile is None:
685
+ return self._issues
686
+
687
+ config_max_age = (
688
+ context.config.max_profile_age
689
+ if context.config else self.max_age
690
+ )
691
+
692
+ if config_max_age is None:
693
+ return self._issues
694
+
695
+ for col in context.original_profile.columns:
696
+ age = datetime.now() - col.profiled_at
697
+ is_stale = age > config_max_age
698
+
699
+ was_detected = (
700
+ col.name in context.change_reasons and
701
+ context.change_reasons[col.name] == ChangeReason.STALE
702
+ )
703
+
704
+ if is_stale and not was_detected:
705
+ self.add_issue(
706
+ ValidationSeverity.WARNING,
707
+ "Stale column not re-profiled",
708
+ column_name=col.name,
709
+ expected=f"Re-profile after {config_max_age}",
710
+ actual=f"Age: {age}",
711
+ recommendation="Check staleness configuration",
712
+ )
713
+
714
+ return self._issues
715
+
716
+
717
+ # =============================================================================
718
+ # Fingerprint Validators
719
+ # =============================================================================
720
+
721
+
722
+ class FingerprintConsistencyValidator(BaseValidator):
723
+ """Validates fingerprint consistency and correctness."""
724
+
725
+ @property
726
+ def name(self) -> str:
727
+ return "fingerprint_consistency"
728
+
729
+ @property
730
+ def category(self) -> ValidationCategory:
731
+ return ValidationCategory.FINGERPRINT
732
+
733
+ def validate(self, context: ValidationContext) -> list[ValidationIssue]:
734
+ """Validate fingerprint consistency."""
735
+ self.reset()
736
+
737
+ if not context.current_fingerprints:
738
+ self.add_issue(
739
+ ValidationSeverity.INFO,
740
+ "No fingerprints to validate",
741
+ )
742
+ return self._issues
743
+
744
+ # Check fingerprint stability (same data should produce same fingerprint)
745
+ calculator = FingerprintCalculator()
746
+
747
+ lf = (
748
+ context.data.lazy()
749
+ if isinstance(context.data, pl.DataFrame)
750
+ else context.data
751
+ )
752
+
753
+ for col_name, fp in context.current_fingerprints.items():
754
+ # Recalculate fingerprint
755
+ try:
756
+ recalc_fp = calculator.calculate(lf, col_name)
757
+
758
+ if recalc_fp.sample_hash != fp.sample_hash:
759
+ self.add_issue(
760
+ ValidationSeverity.ERROR,
761
+ "Fingerprint not stable (different hashes for same data)",
762
+ column_name=col_name,
763
+ expected=fp.sample_hash,
764
+ actual=recalc_fp.sample_hash,
765
+ recommendation="Check fingerprint calculation determinism",
766
+ )
767
+
768
+ if recalc_fp.row_count != fp.row_count:
769
+ self.add_issue(
770
+ ValidationSeverity.ERROR,
771
+ "Fingerprint row count mismatch",
772
+ column_name=col_name,
773
+ expected=fp.row_count,
774
+ actual=recalc_fp.row_count,
775
+ )
776
+
777
+ except Exception as e:
778
+ self.add_issue(
779
+ ValidationSeverity.ERROR,
780
+ f"Failed to calculate fingerprint: {e}",
781
+ column_name=col_name,
782
+ )
783
+
784
+ return self._issues
785
+
786
+
787
+ class FingerprintSensitivityValidator(BaseValidator):
788
+ """Validates fingerprint sensitivity to changes."""
789
+
790
+ @property
791
+ def name(self) -> str:
792
+ return "fingerprint_sensitivity"
793
+
794
+ @property
795
+ def category(self) -> ValidationCategory:
796
+ return ValidationCategory.FINGERPRINT
797
+
798
+ def __init__(
799
+ self,
800
+ min_change_detection_rate: float = 0.95,
801
+ enabled: bool = True,
802
+ ):
803
+ """Initialize validator.
804
+
805
+ Args:
806
+ min_change_detection_rate: Minimum rate for detecting actual changes
807
+ enabled: Whether validator is enabled
808
+ """
809
+ super().__init__(enabled)
810
+ self.min_change_detection_rate = min_change_detection_rate
811
+
812
+ def validate(self, context: ValidationContext) -> list[ValidationIssue]:
813
+ """Validate fingerprint sensitivity."""
814
+ self.reset()
815
+
816
+ if not context.original_fingerprints or not context.current_fingerprints:
817
+ self.add_issue(
818
+ ValidationSeverity.INFO,
819
+ "Need both original and current fingerprints for sensitivity validation",
820
+ )
821
+ return self._issues
822
+
823
+ changes_detected = 0
824
+ actual_changes = 0
825
+
826
+ for col_name in context.current_fingerprints:
827
+ if col_name not in context.original_fingerprints:
828
+ continue
829
+
830
+ orig_fp = context.original_fingerprints[col_name]
831
+ curr_fp = context.current_fingerprints[col_name]
832
+
833
+ # Check if data actually changed
834
+ data_changed = (
835
+ orig_fp.row_count != curr_fp.row_count or
836
+ orig_fp.null_count != curr_fp.null_count or
837
+ orig_fp.sample_hash != curr_fp.sample_hash
838
+ )
839
+
840
+ if data_changed:
841
+ actual_changes += 1
842
+
843
+ # Check if change was detected
844
+ was_detected = col_name in context.profiled_columns
845
+
846
+ if was_detected:
847
+ changes_detected += 1
848
+ else:
849
+ self.add_issue(
850
+ ValidationSeverity.WARNING,
851
+ "Data change not detected by fingerprint",
852
+ column_name=col_name,
853
+ metadata={
854
+ "orig_hash": orig_fp.sample_hash,
855
+ "curr_hash": curr_fp.sample_hash,
856
+ },
857
+ )
858
+
859
+ if actual_changes > 0:
860
+ detection_rate = changes_detected / actual_changes
861
+ if detection_rate < self.min_change_detection_rate:
862
+ self.add_issue(
863
+ ValidationSeverity.ERROR,
864
+ f"Change detection rate below threshold",
865
+ expected=f">= {self.min_change_detection_rate:.0%}",
866
+ actual=f"{detection_rate:.0%}",
867
+ recommendation="Adjust fingerprint sensitivity settings",
868
+ )
869
+
870
+ return self._issues
871
+
872
+
873
+ # =============================================================================
874
+ # Profile Merge Validators
875
+ # =============================================================================
876
+
877
+
878
+ class ProfileMergeValidator(BaseValidator):
879
+ """Validates profile merge correctness."""
880
+
881
+ @property
882
+ def name(self) -> str:
883
+ return "profile_merge"
884
+
885
+ @property
886
+ def category(self) -> ValidationCategory:
887
+ return ValidationCategory.PROFILE_MERGE
888
+
889
+ def validate(self, context: ValidationContext) -> list[ValidationIssue]:
890
+ """Validate profile merge operations."""
891
+ self.reset()
892
+
893
+ # This validator needs merge test data
894
+ if not hasattr(context, 'merge_inputs') or not hasattr(context, 'merge_output'):
895
+ return self._issues
896
+
897
+ merge_inputs = getattr(context, 'merge_inputs', [])
898
+ merge_output = getattr(context, 'merge_output', None)
899
+
900
+ if not merge_inputs or merge_output is None:
901
+ return self._issues
902
+
903
+ self._validate_column_preservation(merge_inputs, merge_output)
904
+ self._validate_row_count(merge_inputs, merge_output)
905
+ self._validate_latest_wins(merge_inputs, merge_output)
906
+
907
+ return self._issues
908
+
909
+ def _validate_column_preservation(
910
+ self,
911
+ inputs: list[TableProfile],
912
+ output: TableProfile,
913
+ ) -> None:
914
+ """Check all input columns appear in output."""
915
+ all_input_columns = set()
916
+ for profile in inputs:
917
+ all_input_columns.update(c.name for c in profile.columns)
918
+
919
+ output_columns = {c.name for c in output.columns}
920
+
921
+ missing = all_input_columns - output_columns
922
+ if missing:
923
+ self.add_issue(
924
+ ValidationSeverity.ERROR,
925
+ f"Columns lost during merge: {missing}",
926
+ recommendation="Check merge logic for column preservation",
927
+ )
928
+
929
+ def _validate_row_count(
930
+ self,
931
+ inputs: list[TableProfile],
932
+ output: TableProfile,
933
+ ) -> None:
934
+ """Validate row count after merge."""
935
+ expected_rows = sum(p.row_count for p in inputs)
936
+
937
+ if output.row_count != expected_rows:
938
+ self.add_issue(
939
+ ValidationSeverity.WARNING,
940
+ "Merged row count doesn't match sum of inputs",
941
+ expected=expected_rows,
942
+ actual=output.row_count,
943
+ recommendation="This may be expected if merging overlapping data",
944
+ )
945
+
946
+ def _validate_latest_wins(
947
+ self,
948
+ inputs: list[TableProfile],
949
+ output: TableProfile,
950
+ ) -> None:
951
+ """Validate that latest profile wins for duplicates."""
952
+ # Sort inputs by profiled_at
953
+ sorted_inputs = sorted(inputs, key=lambda p: p.profiled_at)
954
+
955
+ # Build expected output
956
+ expected_columns = {}
957
+ for profile in sorted_inputs:
958
+ for col in profile.columns:
959
+ expected_columns[col.name] = col
960
+
961
+ # Compare with actual output
962
+ output_columns = {c.name: c for c in output.columns}
963
+
964
+ for col_name, expected_col in expected_columns.items():
965
+ if col_name not in output_columns:
966
+ continue
967
+
968
+ actual_col = output_columns[col_name]
969
+
970
+ # The profile should match the latest input
971
+ if actual_col.profiled_at != expected_col.profiled_at:
972
+ self.add_issue(
973
+ ValidationSeverity.WARNING,
974
+ "Merged column doesn't use latest profile",
975
+ column_name=col_name,
976
+ expected=expected_col.profiled_at.isoformat(),
977
+ actual=actual_col.profiled_at.isoformat(),
978
+ )
979
+
980
+
981
+ # =============================================================================
982
+ # Data Integrity Validators
983
+ # =============================================================================
984
+
985
+
986
+ class DataIntegrityValidator(BaseValidator):
987
+ """Validates data integrity in profiles."""
988
+
989
+ @property
990
+ def name(self) -> str:
991
+ return "data_integrity"
992
+
993
+ @property
994
+ def category(self) -> ValidationCategory:
995
+ return ValidationCategory.DATA_INTEGRITY
996
+
997
+ def validate(self, context: ValidationContext) -> list[ValidationIssue]:
998
+ """Validate data integrity."""
999
+ self.reset()
1000
+
1001
+ profile = context.incremental_profile or context.full_profile
1002
+ if profile is None:
1003
+ self.add_issue(
1004
+ ValidationSeverity.ERROR,
1005
+ "No profile to validate",
1006
+ )
1007
+ return self._issues
1008
+
1009
+ lf = (
1010
+ context.data.lazy()
1011
+ if isinstance(context.data, pl.DataFrame)
1012
+ else context.data
1013
+ )
1014
+
1015
+ actual_row_count = lf.select(pl.len()).collect().item()
1016
+
1017
+ # Row count check
1018
+ if profile.row_count != actual_row_count:
1019
+ self.add_issue(
1020
+ ValidationSeverity.ERROR,
1021
+ "Profile row count doesn't match data",
1022
+ expected=actual_row_count,
1023
+ actual=profile.row_count,
1024
+ recommendation="Profile may be stale",
1025
+ )
1026
+
1027
+ # Column count check
1028
+ schema = lf.collect_schema()
1029
+ if profile.column_count != len(schema):
1030
+ self.add_issue(
1031
+ ValidationSeverity.ERROR,
1032
+ "Profile column count doesn't match data",
1033
+ expected=len(schema),
1034
+ actual=profile.column_count,
1035
+ )
1036
+
1037
+ # Check each column
1038
+ for col in profile.columns:
1039
+ if col.name not in schema:
1040
+ self.add_issue(
1041
+ ValidationSeverity.ERROR,
1042
+ "Profile column not found in data",
1043
+ column_name=col.name,
1044
+ recommendation="Schema may have changed",
1045
+ )
1046
+ continue
1047
+
1048
+ # Verify null count
1049
+ actual_nulls = lf.select(pl.col(col.name).null_count()).collect().item()
1050
+ if col.null_count != actual_nulls:
1051
+ self.add_issue(
1052
+ ValidationSeverity.ERROR,
1053
+ "Null count mismatch",
1054
+ column_name=col.name,
1055
+ expected=actual_nulls,
1056
+ actual=col.null_count,
1057
+ )
1058
+
1059
+ return self._issues
1060
+
1061
+
1062
+ # =============================================================================
1063
+ # Performance Validators
1064
+ # =============================================================================
1065
+
1066
+
1067
+ class PerformanceValidator(BaseValidator):
1068
+ """Validates performance improvements from incremental profiling."""
1069
+
1070
+ @property
1071
+ def name(self) -> str:
1072
+ return "performance"
1073
+
1074
+ @property
1075
+ def category(self) -> ValidationCategory:
1076
+ return ValidationCategory.PERFORMANCE
1077
+
1078
+ def __init__(
1079
+ self,
1080
+ min_speedup: float = 1.0,
1081
+ max_overhead: float = 0.2,
1082
+ enabled: bool = True,
1083
+ ):
1084
+ """Initialize validator.
1085
+
1086
+ Args:
1087
+ min_speedup: Minimum expected speedup ratio
1088
+ max_overhead: Maximum acceptable overhead ratio
1089
+ enabled: Whether validator is enabled
1090
+ """
1091
+ super().__init__(enabled)
1092
+ self.min_speedup = min_speedup
1093
+ self.max_overhead = max_overhead
1094
+
1095
+ def validate(self, context: ValidationContext) -> list[ValidationIssue]:
1096
+ """Validate performance."""
1097
+ self.reset()
1098
+
1099
+ inc_profile = context.incremental_profile
1100
+ full_profile = context.full_profile
1101
+
1102
+ if inc_profile is None or full_profile is None:
1103
+ return self._issues
1104
+
1105
+ inc_duration = inc_profile.profile_duration_ms
1106
+ full_duration = full_profile.profile_duration_ms
1107
+
1108
+ if full_duration == 0:
1109
+ return self._issues
1110
+
1111
+ speedup = full_duration / inc_duration if inc_duration > 0 else float('inf')
1112
+ columns_skipped = len(context.skipped_columns)
1113
+ columns_total = inc_profile.column_count
1114
+
1115
+ if columns_skipped > 0:
1116
+ # Should see improvement if columns were skipped
1117
+ if speedup < self.min_speedup:
1118
+ self.add_issue(
1119
+ ValidationSeverity.WARNING,
1120
+ f"Incremental profiling slower than expected",
1121
+ expected=f">= {self.min_speedup:.1f}x speedup",
1122
+ actual=f"{speedup:.2f}x",
1123
+ metadata={
1124
+ "columns_skipped": columns_skipped,
1125
+ "incremental_ms": inc_duration,
1126
+ "full_ms": full_duration,
1127
+ },
1128
+ )
1129
+ else:
1130
+ # If nothing skipped, check overhead isn't too high
1131
+ overhead = (inc_duration - full_duration) / full_duration if full_duration > 0 else 0
1132
+ if overhead > self.max_overhead:
1133
+ self.add_issue(
1134
+ ValidationSeverity.WARNING,
1135
+ "Incremental overhead too high when nothing skipped",
1136
+ expected=f"<= {self.max_overhead:.0%} overhead",
1137
+ actual=f"{overhead:.0%}",
1138
+ recommendation="Check fingerprint calculation efficiency",
1139
+ )
1140
+
1141
+ return self._issues
1142
+
1143
+
1144
+ # =============================================================================
1145
+ # Validator Registry
1146
+ # =============================================================================
1147
+
1148
+
1149
+ class ValidatorRegistry:
1150
+ """Registry for validators.
1151
+
1152
+ Allows dynamic registration and discovery of validators.
1153
+ """
1154
+
1155
+ def __init__(self):
1156
+ self._validators: dict[str, type[BaseValidator]] = {}
1157
+ self._instances: dict[str, BaseValidator] = {}
1158
+
1159
+ def register(
1160
+ self,
1161
+ name: str,
1162
+ validator_class: type[BaseValidator],
1163
+ ) -> None:
1164
+ """Register a validator class."""
1165
+ self._validators[name] = validator_class
1166
+
1167
+ def get(self, name: str, **kwargs: Any) -> BaseValidator:
1168
+ """Get or create a validator instance."""
1169
+ if name not in self._instances:
1170
+ if name not in self._validators:
1171
+ raise KeyError(f"Unknown validator: {name}")
1172
+ self._instances[name] = self._validators[name](**kwargs)
1173
+ return self._instances[name]
1174
+
1175
+ def get_all(self, **kwargs: Any) -> list[BaseValidator]:
1176
+ """Get all registered validators."""
1177
+ return [
1178
+ self.get(name, **kwargs)
1179
+ for name in self._validators
1180
+ ]
1181
+
1182
+ def get_by_category(
1183
+ self,
1184
+ category: ValidationCategory,
1185
+ **kwargs: Any,
1186
+ ) -> list[BaseValidator]:
1187
+ """Get validators for a category."""
1188
+ return [
1189
+ v for v in self.get_all(**kwargs)
1190
+ if v.category == category
1191
+ ]
1192
+
1193
+ @property
1194
+ def registered_names(self) -> list[str]:
1195
+ """Get names of registered validators."""
1196
+ return list(self._validators.keys())
1197
+
1198
+
1199
+ # Global registry
1200
+ validator_registry = ValidatorRegistry()
1201
+
1202
+ # Register built-in validators
1203
+ validator_registry.register("change_detection_accuracy", ChangeDetectionAccuracyValidator)
1204
+ validator_registry.register("schema_change", SchemaChangeValidator)
1205
+ validator_registry.register("staleness", StalenessValidator)
1206
+ validator_registry.register("fingerprint_consistency", FingerprintConsistencyValidator)
1207
+ validator_registry.register("fingerprint_sensitivity", FingerprintSensitivityValidator)
1208
+ validator_registry.register("profile_merge", ProfileMergeValidator)
1209
+ validator_registry.register("data_integrity", DataIntegrityValidator)
1210
+ validator_registry.register("performance", PerformanceValidator)
1211
+
1212
+
1213
+ def register_validator(name: str) -> Callable[[type[BaseValidator]], type[BaseValidator]]:
1214
+ """Decorator to register a validator."""
1215
+ def decorator(cls: type[BaseValidator]) -> type[BaseValidator]:
1216
+ validator_registry.register(name, cls)
1217
+ return cls
1218
+ return decorator
1219
+
1220
+
1221
+ # =============================================================================
1222
+ # Validation Configuration
1223
+ # =============================================================================
1224
+
1225
+
1226
+ @dataclass
1227
+ class ValidationConfig:
1228
+ """Configuration for validation.
1229
+
1230
+ Attributes:
1231
+ validation_type: Type of validation to perform
1232
+ enabled_validators: Set of validator names to run
1233
+ disabled_validators: Set of validator names to skip
1234
+ max_profile_age: Maximum profile age for staleness checks
1235
+ tolerance: Tolerance for numerical comparisons
1236
+ fail_on_warning: Whether warnings should fail validation
1237
+ fail_on_error: Whether errors should fail validation
1238
+ collect_all_issues: Collect all issues or stop on first failure
1239
+ """
1240
+
1241
+ validation_type: ValidationType = ValidationType.FULL
1242
+ enabled_validators: set[str] | None = None
1243
+ disabled_validators: set[str] = field(default_factory=set)
1244
+ max_profile_age: timedelta | None = None
1245
+ tolerance: float = 0.01
1246
+ fail_on_warning: bool = False
1247
+ fail_on_error: bool = True
1248
+ collect_all_issues: bool = True
1249
+
1250
+ @classmethod
1251
+ def quick(cls) -> "ValidationConfig":
1252
+ """Quick validation configuration."""
1253
+ return cls(
1254
+ validation_type=ValidationType.QUICK,
1255
+ enabled_validators={
1256
+ "change_detection_accuracy",
1257
+ "data_integrity",
1258
+ },
1259
+ )
1260
+
1261
+ @classmethod
1262
+ def strict(cls) -> "ValidationConfig":
1263
+ """Strict validation configuration."""
1264
+ return cls(
1265
+ validation_type=ValidationType.FULL,
1266
+ fail_on_warning=True,
1267
+ tolerance=0.001,
1268
+ )
1269
+
1270
+ @classmethod
1271
+ def change_detection_only(cls) -> "ValidationConfig":
1272
+ """Only validate change detection."""
1273
+ return cls(
1274
+ validation_type=ValidationType.CHANGE_ONLY,
1275
+ enabled_validators={
1276
+ "change_detection_accuracy",
1277
+ "schema_change",
1278
+ "staleness",
1279
+ "fingerprint_consistency",
1280
+ "fingerprint_sensitivity",
1281
+ },
1282
+ )
1283
+
1284
+
1285
+ # =============================================================================
1286
+ # Validation Runner
1287
+ # =============================================================================
1288
+
1289
+
1290
+ class ValidationRunner:
1291
+ """Runs validation checks.
1292
+
1293
+ Orchestrates validators and collects results.
1294
+
1295
+ Example:
1296
+ runner = ValidationRunner(ValidationConfig.strict())
1297
+ result = runner.run(context)
1298
+ if not result.passed:
1299
+ print(result.to_markdown())
1300
+ """
1301
+
1302
+ def __init__(
1303
+ self,
1304
+ config: ValidationConfig | None = None,
1305
+ registry: ValidatorRegistry | None = None,
1306
+ ):
1307
+ """Initialize runner.
1308
+
1309
+ Args:
1310
+ config: Validation configuration
1311
+ registry: Validator registry
1312
+ """
1313
+ self.config = config or ValidationConfig()
1314
+ self.registry = registry or validator_registry
1315
+
1316
+ def run(self, context: ValidationContext) -> ValidationResult:
1317
+ """Run validation.
1318
+
1319
+ Args:
1320
+ context: Validation context
1321
+
1322
+ Returns:
1323
+ Validation result
1324
+ """
1325
+ start_time = time.perf_counter()
1326
+ context.config = self.config
1327
+
1328
+ all_issues: list[ValidationIssue] = []
1329
+ metrics = ValidationMetrics()
1330
+ details: dict[ValidationCategory, dict[str, Any]] = {}
1331
+
1332
+ # Get validators to run
1333
+ validators = self._get_validators()
1334
+
1335
+ for validator in validators:
1336
+ if not validator.enabled:
1337
+ metrics.skipped_checks += 1
1338
+ continue
1339
+
1340
+ try:
1341
+ issues = validator.validate(context)
1342
+ all_issues.extend(issues)
1343
+
1344
+ metrics.total_checks += 1
1345
+ if not issues or all(i.severity == ValidationSeverity.INFO for i in issues):
1346
+ metrics.passed_checks += 1
1347
+ else:
1348
+ has_error = any(
1349
+ i.severity in [ValidationSeverity.ERROR, ValidationSeverity.CRITICAL]
1350
+ for i in issues
1351
+ )
1352
+ if has_error:
1353
+ metrics.failed_checks += 1
1354
+ else:
1355
+ metrics.passed_checks += 1
1356
+
1357
+ # Track details per category
1358
+ if validator.category not in details:
1359
+ details[validator.category] = {"validators_run": []}
1360
+ details[validator.category]["validators_run"].append(validator.name)
1361
+
1362
+ if not self.config.collect_all_issues:
1363
+ # Check if we should stop
1364
+ has_critical = any(
1365
+ i.severity == ValidationSeverity.CRITICAL for i in issues
1366
+ )
1367
+ if has_critical:
1368
+ break
1369
+
1370
+ except Exception as e:
1371
+ logger.exception(f"Validator {validator.name} failed: {e}")
1372
+ all_issues.append(ValidationIssue(
1373
+ category=validator.category,
1374
+ severity=ValidationSeverity.CRITICAL,
1375
+ message=f"Validator {validator.name} raised exception: {e}",
1376
+ ))
1377
+ metrics.failed_checks += 1
1378
+
1379
+ # Calculate additional metrics
1380
+ metrics.duration_ms = (time.perf_counter() - start_time) * 1000
1381
+ metrics.columns_validated = len(context.profiled_columns | context.skipped_columns)
1382
+ metrics.changes_detected = len(context.profiled_columns)
1383
+
1384
+ # Count false positives/negatives
1385
+ for issue in all_issues:
1386
+ if issue.metadata.get("false_positive"):
1387
+ metrics.false_positives += 1
1388
+ if issue.metadata.get("false_negative"):
1389
+ metrics.false_negatives += 1
1390
+
1391
+ # Determine pass/fail
1392
+ passed = self._determine_passed(all_issues)
1393
+
1394
+ return ValidationResult(
1395
+ passed=passed,
1396
+ validation_type=self.config.validation_type,
1397
+ issues=all_issues,
1398
+ metrics=metrics,
1399
+ config={"tolerance": self.config.tolerance},
1400
+ details=details,
1401
+ )
1402
+
1403
+ def _get_validators(self) -> list[BaseValidator]:
1404
+ """Get validators to run based on config."""
1405
+ all_validators = self.registry.get_all()
1406
+
1407
+ if self.config.enabled_validators:
1408
+ validators = [
1409
+ v for v in all_validators
1410
+ if v.name in self.config.enabled_validators
1411
+ ]
1412
+ else:
1413
+ validators = all_validators
1414
+
1415
+ # Remove disabled
1416
+ validators = [
1417
+ v for v in validators
1418
+ if v.name not in self.config.disabled_validators
1419
+ ]
1420
+
1421
+ return validators
1422
+
1423
+ def _determine_passed(self, issues: list[ValidationIssue]) -> bool:
1424
+ """Determine if validation passed."""
1425
+ for issue in issues:
1426
+ if issue.severity == ValidationSeverity.CRITICAL:
1427
+ return False
1428
+ if issue.severity == ValidationSeverity.ERROR and self.config.fail_on_error:
1429
+ return False
1430
+ if issue.severity == ValidationSeverity.WARNING and self.config.fail_on_warning:
1431
+ return False
1432
+ return True
1433
+
1434
+
1435
+ # =============================================================================
1436
+ # Main Validator Class
1437
+ # =============================================================================
1438
+
1439
+
1440
+ class IncrementalValidator:
1441
+ """Main validator for incremental profiling.
1442
+
1443
+ Provides a high-level interface for validation.
1444
+
1445
+ Example:
1446
+ validator = IncrementalValidator()
1447
+
1448
+ # Validate change detection
1449
+ result = validator.validate_change_detection(
1450
+ data=df,
1451
+ original_profile=profile1,
1452
+ incremental_profile=profile2,
1453
+ )
1454
+
1455
+ # Validate profile merge
1456
+ result = validator.validate_merge(
1457
+ profiles=[profile1, profile2],
1458
+ merged_profile=merged,
1459
+ )
1460
+
1461
+ # Full validation with profiling
1462
+ result = validator.validate_full(
1463
+ data=df,
1464
+ original_profile=profile1,
1465
+ )
1466
+ """
1467
+
1468
+ def __init__(
1469
+ self,
1470
+ config: ValidationConfig | None = None,
1471
+ ):
1472
+ """Initialize validator.
1473
+
1474
+ Args:
1475
+ config: Validation configuration
1476
+ """
1477
+ self.config = config or ValidationConfig()
1478
+ self.runner = ValidationRunner(self.config)
1479
+
1480
+ def validate(
1481
+ self,
1482
+ data: pl.LazyFrame | pl.DataFrame,
1483
+ *,
1484
+ original_profile: TableProfile | None = None,
1485
+ incremental_profile: TableProfile | None = None,
1486
+ full_profile: TableProfile | None = None,
1487
+ profiled_columns: set[str] | None = None,
1488
+ skipped_columns: set[str] | None = None,
1489
+ change_reasons: dict[str, ChangeReason] | None = None,
1490
+ ) -> ValidationResult:
1491
+ """Validate incremental profiling results.
1492
+
1493
+ Args:
1494
+ data: Data that was profiled
1495
+ original_profile: Previous profile
1496
+ incremental_profile: Incremental profile to validate
1497
+ full_profile: Full profile for comparison
1498
+ profiled_columns: Columns that were re-profiled
1499
+ skipped_columns: Columns that were skipped
1500
+ change_reasons: Reasons for re-profiling
1501
+
1502
+ Returns:
1503
+ Validation result
1504
+ """
1505
+ context = ValidationContext(
1506
+ data=data,
1507
+ original_profile=original_profile,
1508
+ incremental_profile=incremental_profile,
1509
+ full_profile=full_profile,
1510
+ profiled_columns=profiled_columns or set(),
1511
+ skipped_columns=skipped_columns or set(),
1512
+ change_reasons=change_reasons or {},
1513
+ )
1514
+
1515
+ return self.runner.run(context)
1516
+
1517
+ def validate_change_detection(
1518
+ self,
1519
+ data: pl.LazyFrame | pl.DataFrame,
1520
+ original_profile: TableProfile,
1521
+ incremental_profile: TableProfile,
1522
+ *,
1523
+ full_profile: TableProfile | None = None,
1524
+ ) -> ValidationResult:
1525
+ """Validate change detection specifically.
1526
+
1527
+ Focuses on accuracy of change detection.
1528
+
1529
+ Args:
1530
+ data: Current data
1531
+ original_profile: Previous profile
1532
+ incremental_profile: New incremental profile
1533
+ full_profile: Optional full profile for comparison
1534
+
1535
+ Returns:
1536
+ Validation result
1537
+ """
1538
+ config = ValidationConfig.change_detection_only()
1539
+ runner = ValidationRunner(config)
1540
+
1541
+ context = ValidationContext(
1542
+ data=data,
1543
+ original_profile=original_profile,
1544
+ incremental_profile=incremental_profile,
1545
+ full_profile=full_profile,
1546
+ )
1547
+
1548
+ return runner.run(context)
1549
+
1550
+ def validate_merge(
1551
+ self,
1552
+ profiles: list[TableProfile],
1553
+ merged_profile: TableProfile,
1554
+ ) -> ValidationResult:
1555
+ """Validate profile merge.
1556
+
1557
+ Args:
1558
+ profiles: Input profiles
1559
+ merged_profile: Merged output profile
1560
+
1561
+ Returns:
1562
+ Validation result
1563
+ """
1564
+ config = ValidationConfig(
1565
+ validation_type=ValidationType.MERGE_ONLY,
1566
+ enabled_validators={"profile_merge"},
1567
+ )
1568
+ runner = ValidationRunner(config)
1569
+
1570
+ # Create context with merge data
1571
+ context = ValidationContext(
1572
+ data=pl.DataFrame(), # Empty, not needed for merge validation
1573
+ )
1574
+ setattr(context, 'merge_inputs', profiles)
1575
+ setattr(context, 'merge_output', merged_profile)
1576
+
1577
+ return runner.run(context)
1578
+
1579
+ def validate_full(
1580
+ self,
1581
+ data: pl.LazyFrame | pl.DataFrame,
1582
+ original_profile: TableProfile,
1583
+ *,
1584
+ incremental_config: IncrementalConfig | None = None,
1585
+ ) -> ValidationResult:
1586
+ """Full validation with actual profiling.
1587
+
1588
+ Performs incremental profiling, full profiling, and compares results.
1589
+
1590
+ Args:
1591
+ data: Data to profile
1592
+ original_profile: Previous profile
1593
+ incremental_config: Incremental profiling configuration
1594
+
1595
+ Returns:
1596
+ Validation result
1597
+ """
1598
+ # Perform incremental profiling
1599
+ inc_profiler = IncrementalProfiler(config=incremental_config)
1600
+ inc_profile = inc_profiler.profile(data, previous=original_profile)
1601
+
1602
+ # Perform full profiling for comparison
1603
+ from truthound.profiler.table_profiler import DataProfiler
1604
+ full_profiler = DataProfiler()
1605
+
1606
+ lf = data.lazy() if isinstance(data, pl.DataFrame) else data
1607
+ full_profile = full_profiler.profile(lf)
1608
+
1609
+ # Calculate fingerprints
1610
+ fp_calculator = FingerprintCalculator()
1611
+ current_fps = {}
1612
+
1613
+ schema = lf.collect_schema()
1614
+ for col_name in schema.names():
1615
+ try:
1616
+ current_fps[col_name] = fp_calculator.calculate(lf, col_name)
1617
+ except Exception:
1618
+ pass
1619
+
1620
+ context = ValidationContext(
1621
+ data=data,
1622
+ original_profile=original_profile,
1623
+ incremental_profile=inc_profile,
1624
+ full_profile=full_profile,
1625
+ current_fingerprints=current_fps,
1626
+ profiled_columns=inc_profiler.last_profiled_columns,
1627
+ skipped_columns=inc_profiler.last_skipped_columns,
1628
+ change_reasons=inc_profiler.last_change_reasons,
1629
+ )
1630
+
1631
+ return self.runner.run(context)
1632
+
1633
+
1634
+ # =============================================================================
1635
+ # Convenience Functions
1636
+ # =============================================================================
1637
+
1638
+
1639
+ def validate_incremental(
1640
+ data: pl.LazyFrame | pl.DataFrame,
1641
+ original_profile: TableProfile,
1642
+ incremental_profile: TableProfile,
1643
+ *,
1644
+ full_profile: TableProfile | None = None,
1645
+ strict: bool = False,
1646
+ ) -> ValidationResult:
1647
+ """Convenience function to validate incremental profiling.
1648
+
1649
+ Args:
1650
+ data: Data that was profiled
1651
+ original_profile: Previous profile
1652
+ incremental_profile: New incremental profile
1653
+ full_profile: Optional full profile for comparison
1654
+ strict: Use strict validation
1655
+
1656
+ Returns:
1657
+ Validation result
1658
+ """
1659
+ config = ValidationConfig.strict() if strict else ValidationConfig()
1660
+ validator = IncrementalValidator(config)
1661
+
1662
+ return validator.validate(
1663
+ data=data,
1664
+ original_profile=original_profile,
1665
+ incremental_profile=incremental_profile,
1666
+ full_profile=full_profile,
1667
+ )
1668
+
1669
+
1670
+ def validate_merge(
1671
+ profiles: list[TableProfile],
1672
+ merged_profile: TableProfile,
1673
+ ) -> ValidationResult:
1674
+ """Convenience function to validate profile merge.
1675
+
1676
+ Args:
1677
+ profiles: Input profiles
1678
+ merged_profile: Merged output
1679
+
1680
+ Returns:
1681
+ Validation result
1682
+ """
1683
+ validator = IncrementalValidator()
1684
+ return validator.validate_merge(profiles, merged_profile)
1685
+
1686
+
1687
+ def validate_fingerprints(
1688
+ data: pl.LazyFrame | pl.DataFrame,
1689
+ fingerprints: dict[str, ColumnFingerprint],
1690
+ ) -> ValidationResult:
1691
+ """Validate fingerprint consistency.
1692
+
1693
+ Args:
1694
+ data: Data to check fingerprints against
1695
+ fingerprints: Fingerprints to validate
1696
+
1697
+ Returns:
1698
+ Validation result
1699
+ """
1700
+ config = ValidationConfig(
1701
+ enabled_validators={"fingerprint_consistency"},
1702
+ )
1703
+ runner = ValidationRunner(config)
1704
+
1705
+ context = ValidationContext(
1706
+ data=data,
1707
+ current_fingerprints=fingerprints,
1708
+ )
1709
+
1710
+ return runner.run(context)