truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,970 @@
1
+ """DAG-based Validator Orchestration System.
2
+
3
+ This module provides a dependency-aware execution framework for validators
4
+ with support for parallel execution, caching, and multiple execution strategies.
5
+
6
+ Key Features:
7
+ - Dependency-based topological ordering
8
+ - Parallel execution of independent validators
9
+ - Result caching for dependent validators
10
+ - Multiple execution strategies (Sequential, Parallel, Adaptive)
11
+ - Execution metrics and profiling
12
+
13
+ Usage:
14
+ from truthound.validators.optimization.orchestrator import (
15
+ ValidatorDAG,
16
+ ExecutionPlan,
17
+ ParallelExecutionStrategy,
18
+ )
19
+
20
+ # Build DAG from validators
21
+ dag = ValidatorDAG()
22
+ dag.add_validators(validators)
23
+
24
+ # Create execution plan
25
+ plan = dag.build_execution_plan()
26
+
27
+ # Execute with parallel strategy
28
+ strategy = ParallelExecutionStrategy(max_workers=4)
29
+ results = plan.execute(lf, strategy)
30
+
31
+ Architecture:
32
+ ValidatorDAG
33
+
34
+ ├── ValidatorNode (wraps Validator with metadata)
35
+ │ ├── dependencies: set[str]
36
+ │ ├── provides: set[str]
37
+ │ └── priority: int
38
+
39
+ ├── build_execution_plan() -> ExecutionPlan
40
+ │ └── Topological sort into execution levels
41
+
42
+ └── ExecutionPlan
43
+ ├── levels: list[ExecutionLevel]
44
+ │ └── validators in same level can run in parallel
45
+
46
+ └── execute(lf, strategy) -> ExecutionResult
47
+ ├── SequentialExecutionStrategy
48
+ ├── ParallelExecutionStrategy
49
+ └── AdaptiveExecutionStrategy
50
+ """
51
+
52
+ from __future__ import annotations
53
+
54
+ import time
55
+ import logging
56
+ from abc import ABC, abstractmethod
57
+ from concurrent.futures import ThreadPoolExecutor, as_completed
58
+ from dataclasses import dataclass, field
59
+ from enum import Enum, auto
60
+ from typing import Any, Callable, TypeVar, Generic, Iterator
61
+
62
+ import polars as pl
63
+
64
+ from truthound.validators.base import (
65
+ Validator,
66
+ ValidationIssue,
67
+ ValidatorExecutionResult,
68
+ ValidationResult,
69
+ ErrorContext,
70
+ _validate_safe,
71
+ )
72
+ from truthound.validators.optimization.graph import TopologicalSort
73
+
74
+
75
+ logger = logging.getLogger("truthound.orchestrator")
76
+
77
+
78
+ # ============================================================================
79
+ # Validator Categories for Dependency Resolution
80
+ # ============================================================================
81
+
82
+ class ValidatorPhase(Enum):
83
+ """Execution phases for validators.
84
+
85
+ Validators in earlier phases must complete before later phases begin.
86
+ Within a phase, validators can run in parallel if they don't have
87
+ explicit dependencies.
88
+ """
89
+ SCHEMA = auto() # Schema validation (column existence, types)
90
+ COMPLETENESS = auto() # Null checks, missing values
91
+ UNIQUENESS = auto() # Duplicate detection, key validation
92
+ FORMAT = auto() # Pattern matching, format validation
93
+ RANGE = auto() # Value range, distribution checks
94
+ STATISTICAL = auto() # Aggregate statistics, outliers
95
+ CROSS_TABLE = auto() # Multi-table validation
96
+ CUSTOM = auto() # User-defined validators
97
+
98
+
99
+ # Default phase mapping for built-in validator categories
100
+ CATEGORY_TO_PHASE: dict[str, ValidatorPhase] = {
101
+ "schema": ValidatorPhase.SCHEMA,
102
+ "completeness": ValidatorPhase.COMPLETENESS,
103
+ "uniqueness": ValidatorPhase.UNIQUENESS,
104
+ "string": ValidatorPhase.FORMAT,
105
+ "datetime": ValidatorPhase.FORMAT,
106
+ "distribution": ValidatorPhase.RANGE,
107
+ "aggregate": ValidatorPhase.STATISTICAL,
108
+ "anomaly": ValidatorPhase.STATISTICAL,
109
+ "cross_table": ValidatorPhase.CROSS_TABLE,
110
+ "referential": ValidatorPhase.CROSS_TABLE,
111
+ "general": ValidatorPhase.CUSTOM,
112
+ }
113
+
114
+
115
+ # ============================================================================
116
+ # Validator Node (Wrapper with Dependency Metadata)
117
+ # ============================================================================
118
+
119
+ @dataclass
120
+ class ValidatorNode:
121
+ """Wrapper for Validator with dependency and execution metadata.
122
+
123
+ Attributes:
124
+ validator: The actual Validator instance
125
+ node_id: Unique identifier (defaults to validator.name)
126
+ dependencies: Set of node_ids this validator depends on
127
+ provides: Set of capabilities this validator provides
128
+ phase: Execution phase for ordering
129
+ priority: Priority within phase (lower = earlier)
130
+ estimated_cost: Estimated execution cost (for adaptive scheduling)
131
+ """
132
+ validator: Validator
133
+ node_id: str = ""
134
+ dependencies: set[str] = field(default_factory=set)
135
+ provides: set[str] = field(default_factory=set)
136
+ phase: ValidatorPhase = ValidatorPhase.CUSTOM
137
+ priority: int = 100
138
+ estimated_cost: float = 1.0
139
+
140
+ def __post_init__(self) -> None:
141
+ if not self.node_id:
142
+ self.node_id = self.validator.name
143
+
144
+ # Auto-detect phase from category
145
+ category = getattr(self.validator, "category", "general")
146
+ if self.phase == ValidatorPhase.CUSTOM and category in CATEGORY_TO_PHASE:
147
+ self.phase = CATEGORY_TO_PHASE[category]
148
+
149
+ # Auto-populate provides if not set
150
+ if not self.provides:
151
+ self.provides = {self.node_id}
152
+
153
+ def __hash__(self) -> int:
154
+ return hash(self.node_id)
155
+
156
+ def __eq__(self, other: object) -> bool:
157
+ if isinstance(other, ValidatorNode):
158
+ return self.node_id == other.node_id
159
+ return False
160
+
161
+
162
+ # ============================================================================
163
+ # Execution Level (Group of Parallel-Safe Validators)
164
+ # ============================================================================
165
+
166
+ @dataclass
167
+ class ExecutionLevel:
168
+ """A group of validators that can execute in parallel.
169
+
170
+ All validators in a level have no dependencies on each other,
171
+ only on validators in previous levels.
172
+ """
173
+ level_index: int
174
+ nodes: list[ValidatorNode]
175
+ phase: ValidatorPhase
176
+
177
+ @property
178
+ def size(self) -> int:
179
+ return len(self.nodes)
180
+
181
+ @property
182
+ def node_ids(self) -> list[str]:
183
+ return [n.node_id for n in self.nodes]
184
+
185
+ def __iter__(self) -> Iterator[ValidatorNode]:
186
+ return iter(self.nodes)
187
+
188
+ def __len__(self) -> int:
189
+ return len(self.nodes)
190
+
191
+
192
+ # ============================================================================
193
+ # Execution Result
194
+ # ============================================================================
195
+
196
+ @dataclass
197
+ class NodeExecutionResult:
198
+ """Result of executing a single validator node."""
199
+ node_id: str
200
+ result: ValidatorExecutionResult
201
+ start_time: float
202
+ end_time: float
203
+
204
+ @property
205
+ def duration_ms(self) -> float:
206
+ return (self.end_time - self.start_time) * 1000
207
+
208
+ @property
209
+ def issues(self) -> list[ValidationIssue]:
210
+ return self.result.issues
211
+
212
+ @property
213
+ def status(self) -> ValidationResult:
214
+ return self.result.status
215
+
216
+
217
+ @dataclass
218
+ class LevelExecutionResult:
219
+ """Result of executing an entire level."""
220
+ level_index: int
221
+ node_results: list[NodeExecutionResult]
222
+ start_time: float
223
+ end_time: float
224
+
225
+ @property
226
+ def duration_ms(self) -> float:
227
+ return (self.end_time - self.start_time) * 1000
228
+
229
+ @property
230
+ def all_issues(self) -> list[ValidationIssue]:
231
+ issues = []
232
+ for node_result in self.node_results:
233
+ issues.extend(node_result.issues)
234
+ return issues
235
+
236
+ @property
237
+ def success_count(self) -> int:
238
+ return sum(1 for r in self.node_results if r.status == ValidationResult.SUCCESS)
239
+
240
+ @property
241
+ def failure_count(self) -> int:
242
+ return sum(1 for r in self.node_results if r.status == ValidationResult.FAILED)
243
+
244
+
245
+ @dataclass
246
+ class ExecutionResult:
247
+ """Complete result of executing the entire DAG."""
248
+ level_results: list[LevelExecutionResult]
249
+ total_start_time: float
250
+ total_end_time: float
251
+ strategy_name: str
252
+
253
+ @property
254
+ def total_duration_ms(self) -> float:
255
+ return (self.total_end_time - self.total_start_time) * 1000
256
+
257
+ @property
258
+ def all_issues(self) -> list[ValidationIssue]:
259
+ issues = []
260
+ for level_result in self.level_results:
261
+ issues.extend(level_result.all_issues)
262
+ return issues
263
+
264
+ @property
265
+ def node_results(self) -> list[NodeExecutionResult]:
266
+ results = []
267
+ for level_result in self.level_results:
268
+ results.extend(level_result.node_results)
269
+ return results
270
+
271
+ @property
272
+ def total_validators(self) -> int:
273
+ return sum(len(lr.node_results) for lr in self.level_results)
274
+
275
+ @property
276
+ def success_count(self) -> int:
277
+ return sum(lr.success_count for lr in self.level_results)
278
+
279
+ @property
280
+ def failure_count(self) -> int:
281
+ return sum(lr.failure_count for lr in self.level_results)
282
+
283
+ def get_metrics(self) -> dict[str, Any]:
284
+ """Get execution metrics summary."""
285
+ return {
286
+ "total_duration_ms": self.total_duration_ms,
287
+ "total_validators": self.total_validators,
288
+ "total_issues": len(self.all_issues),
289
+ "success_count": self.success_count,
290
+ "failure_count": self.failure_count,
291
+ "levels": len(self.level_results),
292
+ "strategy": self.strategy_name,
293
+ "parallelism_factor": self._compute_parallelism_factor(),
294
+ }
295
+
296
+ def _compute_parallelism_factor(self) -> float:
297
+ """Compute how much parallelism was achieved."""
298
+ if not self.node_results:
299
+ return 1.0
300
+
301
+ sequential_time = sum(r.duration_ms for r in self.node_results)
302
+ if sequential_time == 0:
303
+ return 1.0
304
+
305
+ return sequential_time / self.total_duration_ms
306
+
307
+
308
+ # ============================================================================
309
+ # Execution Strategies
310
+ # ============================================================================
311
+
312
+ class ExecutionStrategy(ABC):
313
+ """Abstract base class for execution strategies."""
314
+
315
+ name: str = "base"
316
+
317
+ @abstractmethod
318
+ def execute_level(
319
+ self,
320
+ level: ExecutionLevel,
321
+ lf: pl.LazyFrame,
322
+ context: ExecutionContext,
323
+ ) -> LevelExecutionResult:
324
+ """Execute all validators in a level."""
325
+ pass
326
+
327
+
328
+ @dataclass
329
+ class ExecutionContext:
330
+ """Shared context for execution."""
331
+ previous_results: dict[str, NodeExecutionResult] = field(default_factory=dict)
332
+ cached_data: dict[str, Any] = field(default_factory=dict)
333
+ skip_on_error: bool = True
334
+ log_errors: bool = True
335
+
336
+ def get_result(self, node_id: str) -> NodeExecutionResult | None:
337
+ return self.previous_results.get(node_id)
338
+
339
+ def add_result(self, result: NodeExecutionResult) -> None:
340
+ self.previous_results[result.node_id] = result
341
+
342
+
343
+ class SequentialExecutionStrategy(ExecutionStrategy):
344
+ """Execute validators one at a time.
345
+
346
+ Simplest strategy, useful for debugging and low-resource environments.
347
+ """
348
+
349
+ name = "sequential"
350
+
351
+ def execute_level(
352
+ self,
353
+ level: ExecutionLevel,
354
+ lf: pl.LazyFrame,
355
+ context: ExecutionContext,
356
+ ) -> LevelExecutionResult:
357
+ level_start = time.time()
358
+ node_results: list[NodeExecutionResult] = []
359
+
360
+ for node in level:
361
+ start = time.time()
362
+ result = _validate_safe(
363
+ node.validator,
364
+ lf,
365
+ skip_on_error=context.skip_on_error,
366
+ log_errors=context.log_errors,
367
+ )
368
+ end = time.time()
369
+
370
+ node_result = NodeExecutionResult(
371
+ node_id=node.node_id,
372
+ result=result,
373
+ start_time=start,
374
+ end_time=end,
375
+ )
376
+ node_results.append(node_result)
377
+ context.add_result(node_result)
378
+
379
+ return LevelExecutionResult(
380
+ level_index=level.level_index,
381
+ node_results=node_results,
382
+ start_time=level_start,
383
+ end_time=time.time(),
384
+ )
385
+
386
+
387
+ class ParallelExecutionStrategy(ExecutionStrategy):
388
+ """Execute validators in parallel using ThreadPoolExecutor.
389
+
390
+ Best for I/O-bound validators or when using Polars' streaming mode.
391
+ """
392
+
393
+ name = "parallel"
394
+
395
+ def __init__(self, max_workers: int | None = None):
396
+ """Initialize parallel strategy.
397
+
398
+ Args:
399
+ max_workers: Maximum number of worker threads.
400
+ None = min(32, cpu_count + 4)
401
+ """
402
+ self.max_workers = max_workers
403
+
404
+ def execute_level(
405
+ self,
406
+ level: ExecutionLevel,
407
+ lf: pl.LazyFrame,
408
+ context: ExecutionContext,
409
+ ) -> LevelExecutionResult:
410
+ level_start = time.time()
411
+ node_results: list[NodeExecutionResult] = []
412
+
413
+ # For single validator, no need for thread pool
414
+ if len(level) <= 1:
415
+ for node in level:
416
+ start = time.time()
417
+ result = _validate_safe(
418
+ node.validator,
419
+ lf,
420
+ skip_on_error=context.skip_on_error,
421
+ log_errors=context.log_errors,
422
+ )
423
+ end = time.time()
424
+
425
+ node_result = NodeExecutionResult(
426
+ node_id=node.node_id,
427
+ result=result,
428
+ start_time=start,
429
+ end_time=end,
430
+ )
431
+ node_results.append(node_result)
432
+ context.add_result(node_result)
433
+ else:
434
+ # Execute in parallel
435
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
436
+ future_to_node = {}
437
+
438
+ for node in level:
439
+ future = executor.submit(
440
+ self._execute_node,
441
+ node,
442
+ lf,
443
+ context.skip_on_error,
444
+ context.log_errors,
445
+ )
446
+ future_to_node[future] = node
447
+
448
+ for future in as_completed(future_to_node):
449
+ node = future_to_node[future]
450
+ try:
451
+ node_result = future.result()
452
+ node_results.append(node_result)
453
+ context.add_result(node_result)
454
+ except Exception as e:
455
+ logger.error(f"Error executing {node.node_id}: {e}")
456
+ node_result = NodeExecutionResult(
457
+ node_id=node.node_id,
458
+ result=ValidatorExecutionResult(
459
+ validator_name=node.validator.name,
460
+ status=ValidationResult.FAILED,
461
+ issues=[],
462
+ error_message=str(e),
463
+ error_context=ErrorContext(type(e).__name__, str(e)),
464
+ ),
465
+ start_time=time.time(),
466
+ end_time=time.time(),
467
+ )
468
+ node_results.append(node_result)
469
+ context.add_result(node_result)
470
+
471
+ return LevelExecutionResult(
472
+ level_index=level.level_index,
473
+ node_results=node_results,
474
+ start_time=level_start,
475
+ end_time=time.time(),
476
+ )
477
+
478
+ def _execute_node(
479
+ self,
480
+ node: ValidatorNode,
481
+ lf: pl.LazyFrame,
482
+ skip_on_error: bool,
483
+ log_errors: bool,
484
+ ) -> NodeExecutionResult:
485
+ """Execute a single node (for thread pool)."""
486
+ start = time.time()
487
+ result = _validate_safe(
488
+ node.validator,
489
+ lf,
490
+ skip_on_error=skip_on_error,
491
+ log_errors=log_errors,
492
+ )
493
+ end = time.time()
494
+
495
+ return NodeExecutionResult(
496
+ node_id=node.node_id,
497
+ result=result,
498
+ start_time=start,
499
+ end_time=end,
500
+ )
501
+
502
+
503
+ class AdaptiveExecutionStrategy(ExecutionStrategy):
504
+ """Dynamically choose between sequential and parallel execution.
505
+
506
+ Uses heuristics based on:
507
+ - Number of validators in level
508
+ - Estimated cost of validators
509
+ - System resource availability
510
+ """
511
+
512
+ name = "adaptive"
513
+
514
+ def __init__(
515
+ self,
516
+ parallel_threshold: int = 3,
517
+ max_workers: int | None = None,
518
+ ):
519
+ """Initialize adaptive strategy.
520
+
521
+ Args:
522
+ parallel_threshold: Minimum validators in level to use parallel
523
+ max_workers: Maximum workers for parallel execution
524
+ """
525
+ self.parallel_threshold = parallel_threshold
526
+ self.sequential = SequentialExecutionStrategy()
527
+ self.parallel = ParallelExecutionStrategy(max_workers=max_workers)
528
+
529
+ def execute_level(
530
+ self,
531
+ level: ExecutionLevel,
532
+ lf: pl.LazyFrame,
533
+ context: ExecutionContext,
534
+ ) -> LevelExecutionResult:
535
+ if len(level) >= self.parallel_threshold:
536
+ return self.parallel.execute_level(level, lf, context)
537
+ else:
538
+ return self.sequential.execute_level(level, lf, context)
539
+
540
+
541
+ # ============================================================================
542
+ # Execution Plan
543
+ # ============================================================================
544
+
545
+ @dataclass
546
+ class ExecutionPlan:
547
+ """Executable plan for running validators in dependency order.
548
+
549
+ The plan consists of levels, where each level contains validators
550
+ that can run in parallel. Levels are executed sequentially.
551
+ """
552
+ levels: list[ExecutionLevel]
553
+ total_nodes: int
554
+ has_cycles: bool = False
555
+ cycle_info: str | None = None
556
+
557
+ def execute(
558
+ self,
559
+ lf: pl.LazyFrame,
560
+ strategy: ExecutionStrategy | None = None,
561
+ skip_on_error: bool = True,
562
+ log_errors: bool = True,
563
+ ) -> ExecutionResult:
564
+ """Execute the plan.
565
+
566
+ Args:
567
+ lf: LazyFrame to validate
568
+ strategy: Execution strategy (default: AdaptiveExecutionStrategy)
569
+ skip_on_error: Continue on validator errors
570
+ log_errors: Log validation errors
571
+
572
+ Returns:
573
+ ExecutionResult with all validation results
574
+ """
575
+ if strategy is None:
576
+ strategy = AdaptiveExecutionStrategy()
577
+
578
+ total_start = time.time()
579
+ context = ExecutionContext(
580
+ skip_on_error=skip_on_error,
581
+ log_errors=log_errors,
582
+ )
583
+ level_results: list[LevelExecutionResult] = []
584
+
585
+ for level in self.levels:
586
+ level_result = strategy.execute_level(level, lf, context)
587
+ level_results.append(level_result)
588
+
589
+ return ExecutionResult(
590
+ level_results=level_results,
591
+ total_start_time=total_start,
592
+ total_end_time=time.time(),
593
+ strategy_name=strategy.name,
594
+ )
595
+
596
+ def get_summary(self) -> dict[str, Any]:
597
+ """Get plan summary."""
598
+ return {
599
+ "total_nodes": self.total_nodes,
600
+ "total_levels": len(self.levels),
601
+ "has_cycles": self.has_cycles,
602
+ "levels": [
603
+ {
604
+ "index": level.level_index,
605
+ "size": level.size,
606
+ "phase": level.phase.name,
607
+ "nodes": level.node_ids,
608
+ }
609
+ for level in self.levels
610
+ ],
611
+ }
612
+
613
+ def __repr__(self) -> str:
614
+ return (
615
+ f"ExecutionPlan(nodes={self.total_nodes}, "
616
+ f"levels={len(self.levels)}, has_cycles={self.has_cycles})"
617
+ )
618
+
619
+
620
+ # ============================================================================
621
+ # Validator DAG
622
+ # ============================================================================
623
+
624
+ class ValidatorDAG:
625
+ """Directed Acyclic Graph for validator dependency management.
626
+
627
+ Builds an execution plan from a set of validators based on:
628
+ 1. Explicit dependencies (validator.dependencies)
629
+ 2. Phase ordering (schema -> completeness -> uniqueness -> ...)
630
+ 3. Priority within phase
631
+
632
+ Example:
633
+ dag = ValidatorDAG()
634
+
635
+ # Add validators with automatic dependency detection
636
+ dag.add_validator(NullValidator())
637
+ dag.add_validator(DuplicateValidator())
638
+
639
+ # Add with explicit dependencies
640
+ dag.add_validator(
641
+ RangeValidator(),
642
+ dependencies={"null"}, # Must run after NullValidator
643
+ )
644
+
645
+ # Build and execute plan
646
+ plan = dag.build_execution_plan()
647
+ result = plan.execute(lf)
648
+ """
649
+
650
+ def __init__(self):
651
+ self.nodes: dict[str, ValidatorNode] = {}
652
+ self._dependency_graph: dict[str, set[str]] = {}
653
+
654
+ def add_validator(
655
+ self,
656
+ validator: Validator,
657
+ dependencies: set[str] | None = None,
658
+ provides: set[str] | None = None,
659
+ phase: ValidatorPhase | None = None,
660
+ priority: int = 100,
661
+ estimated_cost: float = 1.0,
662
+ ) -> ValidatorNode:
663
+ """Add a validator to the DAG.
664
+
665
+ Args:
666
+ validator: Validator instance
667
+ dependencies: Set of node_ids this depends on
668
+ provides: Set of capabilities this provides
669
+ phase: Execution phase override
670
+ priority: Priority within phase (lower = earlier)
671
+ estimated_cost: Estimated execution cost
672
+
673
+ Returns:
674
+ The created ValidatorNode
675
+ """
676
+ node_id = validator.name
677
+
678
+ # Check for explicit dependencies on validator class
679
+ if dependencies is None:
680
+ dependencies = getattr(validator, "dependencies", set())
681
+ if dependencies is None:
682
+ dependencies = set()
683
+
684
+ # Auto-detect phase from category
685
+ if phase is None:
686
+ category = getattr(validator, "category", "general")
687
+ phase = CATEGORY_TO_PHASE.get(category, ValidatorPhase.CUSTOM)
688
+
689
+ node = ValidatorNode(
690
+ validator=validator,
691
+ node_id=node_id,
692
+ dependencies=set(dependencies),
693
+ provides=provides or {node_id},
694
+ phase=phase,
695
+ priority=priority,
696
+ estimated_cost=estimated_cost,
697
+ )
698
+
699
+ self.nodes[node_id] = node
700
+ return node
701
+
702
+ def add_validators(
703
+ self,
704
+ validators: list[Validator],
705
+ ) -> list[ValidatorNode]:
706
+ """Add multiple validators.
707
+
708
+ Args:
709
+ validators: List of Validator instances
710
+
711
+ Returns:
712
+ List of created ValidatorNodes
713
+ """
714
+ return [self.add_validator(v) for v in validators]
715
+
716
+ def add_dependency(self, from_id: str, to_id: str) -> None:
717
+ """Add a dependency edge.
718
+
719
+ Args:
720
+ from_id: Node that depends
721
+ to_id: Node that is depended upon
722
+ """
723
+ if from_id in self.nodes:
724
+ self.nodes[from_id].dependencies.add(to_id)
725
+
726
+ def build_execution_plan(self) -> ExecutionPlan:
727
+ """Build an execution plan from the DAG.
728
+
729
+ Returns:
730
+ ExecutionPlan with validators organized into levels
731
+ """
732
+ if not self.nodes:
733
+ return ExecutionPlan(levels=[], total_nodes=0)
734
+
735
+ # Build full dependency graph including phase dependencies
736
+ adjacency = self._build_adjacency_with_phases()
737
+
738
+ # Check for cycles
739
+ try:
740
+ sorter = TopologicalSort(adjacency)
741
+ sorted_ids = sorter.sort()
742
+ except ValueError as e:
743
+ logger.warning(f"Cycle detected in validator dependencies: {e}")
744
+ # Fallback to phase-only ordering
745
+ sorted_ids = self._sort_by_phase_only()
746
+ return ExecutionPlan(
747
+ levels=self._group_into_levels(sorted_ids),
748
+ total_nodes=len(self.nodes),
749
+ has_cycles=True,
750
+ cycle_info=str(e),
751
+ )
752
+
753
+ # Group into execution levels
754
+ levels = self._group_into_levels(sorted_ids)
755
+
756
+ return ExecutionPlan(
757
+ levels=levels,
758
+ total_nodes=len(self.nodes),
759
+ )
760
+
761
+ def _build_adjacency_with_phases(self) -> dict[str, list[str]]:
762
+ """Build adjacency list including implicit phase dependencies."""
763
+ adjacency: dict[str, list[str]] = {node_id: [] for node_id in self.nodes}
764
+
765
+ # Add explicit dependencies
766
+ for node_id, node in self.nodes.items():
767
+ for dep in node.dependencies:
768
+ if dep in self.nodes:
769
+ adjacency[dep].append(node_id)
770
+
771
+ # Add implicit phase dependencies
772
+ # Validators in later phases depend on validators in earlier phases
773
+ phase_to_nodes: dict[ValidatorPhase, list[str]] = {}
774
+ for node_id, node in self.nodes.items():
775
+ if node.phase not in phase_to_nodes:
776
+ phase_to_nodes[node.phase] = []
777
+ phase_to_nodes[node.phase].append(node_id)
778
+
779
+ # Sort phases by value
780
+ sorted_phases = sorted(phase_to_nodes.keys(), key=lambda p: p.value)
781
+
782
+ # Add edges from each phase to the next
783
+ for i in range(len(sorted_phases) - 1):
784
+ current_phase = sorted_phases[i]
785
+ next_phase = sorted_phases[i + 1]
786
+
787
+ # Each node in next phase depends on all nodes in current phase
788
+ for current_node in phase_to_nodes[current_phase]:
789
+ for next_node in phase_to_nodes[next_phase]:
790
+ if next_node not in adjacency[current_node]:
791
+ adjacency[current_node].append(next_node)
792
+
793
+ return adjacency
794
+
795
+ def _sort_by_phase_only(self) -> list[str]:
796
+ """Fallback sort using only phases (ignores explicit dependencies)."""
797
+ sorted_nodes = sorted(
798
+ self.nodes.values(),
799
+ key=lambda n: (n.phase.value, n.priority, n.node_id),
800
+ )
801
+ return [n.node_id for n in sorted_nodes]
802
+
803
+ def _group_into_levels(self, sorted_ids: list[str]) -> list[ExecutionLevel]:
804
+ """Group sorted node IDs into execution levels.
805
+
806
+ Nodes with no dependencies on each other can be in the same level.
807
+ """
808
+ if not sorted_ids:
809
+ return []
810
+
811
+ levels: list[ExecutionLevel] = []
812
+ assigned: set[str] = set()
813
+ remaining = list(sorted_ids)
814
+
815
+ while remaining:
816
+ # Find all nodes whose dependencies are already assigned
817
+ current_level_nodes: list[ValidatorNode] = []
818
+ current_phase = None
819
+
820
+ for node_id in remaining:
821
+ node = self.nodes[node_id]
822
+ deps_satisfied = all(
823
+ dep in assigned or dep not in self.nodes
824
+ for dep in node.dependencies
825
+ )
826
+
827
+ if deps_satisfied:
828
+ # Check phase compatibility - only group same phase
829
+ if current_phase is None:
830
+ current_phase = node.phase
831
+
832
+ if node.phase == current_phase:
833
+ current_level_nodes.append(node)
834
+
835
+ if not current_level_nodes:
836
+ # Shouldn't happen if graph is acyclic, but handle gracefully
837
+ logger.warning("Could not find nodes for next level")
838
+ # Take the first remaining node
839
+ node_id = remaining[0]
840
+ current_level_nodes = [self.nodes[node_id]]
841
+ current_phase = self.nodes[node_id].phase
842
+
843
+ # Sort within level by priority
844
+ current_level_nodes.sort(key=lambda n: (n.priority, n.node_id))
845
+
846
+ # Create level
847
+ level = ExecutionLevel(
848
+ level_index=len(levels),
849
+ nodes=current_level_nodes,
850
+ phase=current_phase or ValidatorPhase.CUSTOM,
851
+ )
852
+ levels.append(level)
853
+
854
+ # Mark as assigned
855
+ for node in current_level_nodes:
856
+ assigned.add(node.node_id)
857
+ remaining.remove(node.node_id)
858
+
859
+ return levels
860
+
861
+ def get_dependency_chain(self, node_id: str) -> list[str]:
862
+ """Get the full dependency chain for a node.
863
+
864
+ Args:
865
+ node_id: Node to get dependencies for
866
+
867
+ Returns:
868
+ List of node_ids in dependency order
869
+ """
870
+ if node_id not in self.nodes:
871
+ return []
872
+
873
+ visited: set[str] = set()
874
+ chain: list[str] = []
875
+
876
+ def visit(nid: str) -> None:
877
+ if nid in visited or nid not in self.nodes:
878
+ return
879
+ visited.add(nid)
880
+
881
+ for dep in self.nodes[nid].dependencies:
882
+ visit(dep)
883
+
884
+ chain.append(nid)
885
+
886
+ visit(node_id)
887
+ return chain
888
+
889
+ def visualize(self) -> str:
890
+ """Create ASCII visualization of the DAG.
891
+
892
+ Returns:
893
+ ASCII art representation of the DAG
894
+ """
895
+ if not self.nodes:
896
+ return "Empty DAG"
897
+
898
+ lines = ["ValidatorDAG:"]
899
+
900
+ # Group by phase
901
+ phase_to_nodes: dict[ValidatorPhase, list[ValidatorNode]] = {}
902
+ for node in self.nodes.values():
903
+ if node.phase not in phase_to_nodes:
904
+ phase_to_nodes[node.phase] = []
905
+ phase_to_nodes[node.phase].append(node)
906
+
907
+ for phase in sorted(phase_to_nodes.keys(), key=lambda p: p.value):
908
+ lines.append(f"\n [{phase.name}]")
909
+ nodes = sorted(phase_to_nodes[phase], key=lambda n: n.priority)
910
+
911
+ for node in nodes:
912
+ deps = ", ".join(sorted(node.dependencies)) if node.dependencies else "none"
913
+ lines.append(f" - {node.node_id} (deps: {deps})")
914
+
915
+ return "\n".join(lines)
916
+
917
+ def __repr__(self) -> str:
918
+ return f"ValidatorDAG(nodes={len(self.nodes)})"
919
+
920
+ def __len__(self) -> int:
921
+ return len(self.nodes)
922
+
923
+
924
+ # ============================================================================
925
+ # Convenience Functions
926
+ # ============================================================================
927
+
928
+ def create_execution_plan(
929
+ validators: list[Validator],
930
+ dependencies: dict[str, set[str]] | None = None,
931
+ ) -> ExecutionPlan:
932
+ """Create an execution plan from validators.
933
+
934
+ Args:
935
+ validators: List of validators
936
+ dependencies: Optional explicit dependencies {validator_name: {dep_names}}
937
+
938
+ Returns:
939
+ ExecutionPlan ready for execution
940
+ """
941
+ dag = ValidatorDAG()
942
+
943
+ for validator in validators:
944
+ deps = None
945
+ if dependencies and validator.name in dependencies:
946
+ deps = dependencies[validator.name]
947
+ dag.add_validator(validator, dependencies=deps)
948
+
949
+ return dag.build_execution_plan()
950
+
951
+
952
+ def execute_validators(
953
+ validators: list[Validator],
954
+ lf: pl.LazyFrame,
955
+ strategy: ExecutionStrategy | None = None,
956
+ dependencies: dict[str, set[str]] | None = None,
957
+ ) -> ExecutionResult:
958
+ """Execute validators with DAG-based ordering.
959
+
960
+ Args:
961
+ validators: List of validators
962
+ lf: LazyFrame to validate
963
+ strategy: Execution strategy (default: AdaptiveExecutionStrategy)
964
+ dependencies: Optional explicit dependencies
965
+
966
+ Returns:
967
+ ExecutionResult with all validation results
968
+ """
969
+ plan = create_execution_plan(validators, dependencies)
970
+ return plan.execute(lf, strategy)