truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,807 @@
1
+ """Testing utilities for custom validators.
2
+
3
+ This module provides a comprehensive testing framework for validators:
4
+ - ValidatorTestCase: Base class for validator unit tests
5
+ - Test data generators and fixtures
6
+ - Assertion helpers for validation results
7
+ - Performance benchmarking utilities
8
+
9
+ Example:
10
+ class TestMyValidator(ValidatorTestCase):
11
+ validator_class = MyValidator
12
+
13
+ def test_detects_violations(self):
14
+ df = self.create_df({"col1": [1, -1, 2, -2]})
15
+ result = self.validate(df)
16
+ self.assert_has_issue("col1", "negative_value", 2)
17
+
18
+ def test_no_issues_for_valid_data(self):
19
+ df = self.create_df({"col1": [1, 2, 3]})
20
+ result = self.validate(df)
21
+ self.assert_no_issues()
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import time
27
+ from dataclasses import dataclass, field
28
+ from typing import Any, Callable, TypeVar
29
+ from unittest import TestCase
30
+
31
+ import polars as pl
32
+
33
+ from truthound.validators.base import (
34
+ Validator,
35
+ ValidationIssue,
36
+ ValidatorConfig,
37
+ ValidatorExecutionResult,
38
+ )
39
+ from truthound.types import Severity
40
+
41
+
42
+ T = TypeVar("T", bound=Validator)
43
+
44
+
45
+ @dataclass
46
+ class ValidatorTestResult:
47
+ """Result of a validator test run.
48
+
49
+ Attributes:
50
+ issues: List of validation issues found
51
+ execution_time_ms: Time taken in milliseconds
52
+ error: Any error that occurred
53
+ passed: Whether the test passed (based on expectations)
54
+ """
55
+
56
+ issues: list[ValidationIssue] = field(default_factory=list)
57
+ execution_time_ms: float = 0.0
58
+ error: Exception | None = None
59
+ passed: bool = True
60
+
61
+ @property
62
+ def issue_count(self) -> int:
63
+ """Total number of issues found."""
64
+ return len(self.issues)
65
+
66
+ @property
67
+ def total_violations(self) -> int:
68
+ """Sum of all violation counts."""
69
+ return sum(issue.count for issue in self.issues)
70
+
71
+ def get_issues_for_column(self, column: str) -> list[ValidationIssue]:
72
+ """Get issues for a specific column."""
73
+ return [i for i in self.issues if i.column == column]
74
+
75
+ def get_issues_by_type(self, issue_type: str) -> list[ValidationIssue]:
76
+ """Get issues of a specific type."""
77
+ return [i for i in self.issues if i.issue_type == issue_type]
78
+
79
+ def has_issue(
80
+ self,
81
+ column: str | None = None,
82
+ issue_type: str | None = None,
83
+ min_count: int | None = None,
84
+ ) -> bool:
85
+ """Check if a matching issue exists."""
86
+ for issue in self.issues:
87
+ if column and issue.column != column:
88
+ continue
89
+ if issue_type and issue.issue_type != issue_type:
90
+ continue
91
+ if min_count and issue.count < min_count:
92
+ continue
93
+ return True
94
+ return False
95
+
96
+
97
+ class ValidatorTestCase(TestCase):
98
+ """Base class for validator unit tests.
99
+
100
+ Provides convenient methods for testing validators:
101
+ - create_df: Create test DataFrames
102
+ - validate: Run validation and capture results
103
+ - assert_*: Various assertion helpers
104
+
105
+ Example:
106
+ class TestNullValidator(ValidatorTestCase):
107
+ validator_class = NullValidator
108
+
109
+ def test_finds_nulls(self):
110
+ df = self.create_df({
111
+ "name": ["Alice", None, "Bob", None],
112
+ "age": [25, 30, None, 35],
113
+ })
114
+ self.validate(df)
115
+ self.assert_has_issue("name", "null_value", min_count=2)
116
+ self.assert_has_issue("age", "null_value", min_count=1)
117
+
118
+ def test_no_issues_when_no_nulls(self):
119
+ df = self.create_df({
120
+ "name": ["Alice", "Bob"],
121
+ "age": [25, 30],
122
+ })
123
+ self.validate(df)
124
+ self.assert_no_issues()
125
+ """
126
+
127
+ validator_class: type[Validator] | None = None
128
+ default_config: ValidatorConfig | None = None
129
+
130
+ def setUp(self) -> None:
131
+ """Set up test fixtures."""
132
+ super().setUp()
133
+ self._last_result: ValidatorTestResult | None = None
134
+ self._validator: Validator | None = None
135
+
136
+ def create_validator(
137
+ self,
138
+ config: ValidatorConfig | None = None,
139
+ **kwargs: Any,
140
+ ) -> Validator:
141
+ """Create a validator instance.
142
+
143
+ Args:
144
+ config: Validator configuration
145
+ **kwargs: Additional config options
146
+
147
+ Returns:
148
+ Validator instance
149
+
150
+ Raises:
151
+ ValueError: If validator_class is not set
152
+ """
153
+ if self.validator_class is None:
154
+ raise ValueError(
155
+ "validator_class must be set on the test class, "
156
+ "or override create_validator()"
157
+ )
158
+
159
+ effective_config = config or self.default_config
160
+ self._validator = self.validator_class(effective_config, **kwargs)
161
+ return self._validator
162
+
163
+ def create_df(self, data: dict[str, list[Any]]) -> pl.LazyFrame:
164
+ """Create a test LazyFrame from a dictionary.
165
+
166
+ Args:
167
+ data: Dictionary mapping column names to values
168
+
169
+ Returns:
170
+ LazyFrame for testing
171
+ """
172
+ return pl.LazyFrame(data)
173
+
174
+ def create_large_df(
175
+ self,
176
+ rows: int = 1_000_000,
177
+ schema: dict[str, type] | None = None,
178
+ seed: int = 42,
179
+ ) -> pl.LazyFrame:
180
+ """Create a large test DataFrame for performance testing.
181
+
182
+ Args:
183
+ rows: Number of rows
184
+ schema: Column name to type mapping
185
+ seed: Random seed for reproducibility
186
+
187
+ Returns:
188
+ Large LazyFrame for testing
189
+ """
190
+ import random
191
+
192
+ random.seed(seed)
193
+
194
+ if schema is None:
195
+ schema = {
196
+ "id": int,
197
+ "value": float,
198
+ "name": str,
199
+ }
200
+
201
+ data: dict[str, list[Any]] = {}
202
+
203
+ for col, dtype in schema.items():
204
+ if dtype == int:
205
+ data[col] = list(range(rows))
206
+ elif dtype == float:
207
+ data[col] = [random.random() * 100 for _ in range(rows)]
208
+ elif dtype == str:
209
+ data[col] = [f"value_{i}" for i in range(rows)]
210
+ else:
211
+ data[col] = [None] * rows
212
+
213
+ return pl.LazyFrame(data)
214
+
215
+ def validate(
216
+ self,
217
+ lf: pl.LazyFrame,
218
+ validator: Validator | None = None,
219
+ **kwargs: Any,
220
+ ) -> ValidatorTestResult:
221
+ """Run validation and store the result.
222
+
223
+ Args:
224
+ lf: LazyFrame to validate
225
+ validator: Validator to use (creates default if None)
226
+ **kwargs: Additional config options for new validator
227
+
228
+ Returns:
229
+ ValidatorTestResult with issues and timing
230
+ """
231
+ if validator is None:
232
+ if self._validator is None:
233
+ self.create_validator(**kwargs)
234
+ validator = self._validator
235
+
236
+ start_time = time.time()
237
+ error: Exception | None = None
238
+ issues: list[ValidationIssue] = []
239
+
240
+ try:
241
+ issues = validator.validate(lf) # type: ignore
242
+ except Exception as e:
243
+ error = e
244
+
245
+ execution_time = (time.time() - start_time) * 1000
246
+
247
+ self._last_result = ValidatorTestResult(
248
+ issues=issues,
249
+ execution_time_ms=execution_time,
250
+ error=error,
251
+ )
252
+
253
+ return self._last_result
254
+
255
+ def validate_safe(
256
+ self,
257
+ lf: pl.LazyFrame,
258
+ validator: Validator | None = None,
259
+ **kwargs: Any,
260
+ ) -> ValidatorExecutionResult:
261
+ """Run validation with error handling.
262
+
263
+ Args:
264
+ lf: LazyFrame to validate
265
+ validator: Validator to use
266
+ **kwargs: Additional config options
267
+
268
+ Returns:
269
+ ValidatorExecutionResult with full status
270
+ """
271
+ if validator is None:
272
+ if self._validator is None:
273
+ self.create_validator(**kwargs)
274
+ validator = self._validator
275
+
276
+ return validator.validate_safe(lf) # type: ignore
277
+
278
+ @property
279
+ def last_result(self) -> ValidatorTestResult:
280
+ """Get the last validation result.
281
+
282
+ Raises:
283
+ AssertionError: If no validation has been run
284
+ """
285
+ if self._last_result is None:
286
+ raise AssertionError("No validation has been run. Call validate() first.")
287
+ return self._last_result
288
+
289
+ def assert_no_issues(self) -> None:
290
+ """Assert that no issues were found."""
291
+ result = self.last_result
292
+ if result.issues:
293
+ issue_summary = "\n".join(
294
+ f" - {i.column}: {i.issue_type} ({i.count})" for i in result.issues
295
+ )
296
+ self.fail(
297
+ f"Expected no issues, but found {len(result.issues)}:\n{issue_summary}"
298
+ )
299
+
300
+ def assert_has_issue(
301
+ self,
302
+ column: str | None = None,
303
+ issue_type: str | None = None,
304
+ min_count: int | None = None,
305
+ exact_count: int | None = None,
306
+ severity: Severity | None = None,
307
+ ) -> None:
308
+ """Assert that a matching issue exists.
309
+
310
+ Args:
311
+ column: Expected column (None = any)
312
+ issue_type: Expected issue type (None = any)
313
+ min_count: Minimum violation count
314
+ exact_count: Exact violation count
315
+ severity: Expected severity level
316
+ """
317
+ result = self.last_result
318
+
319
+ for issue in result.issues:
320
+ if column and issue.column != column:
321
+ continue
322
+ if issue_type and issue.issue_type != issue_type:
323
+ continue
324
+ if severity and issue.severity != severity:
325
+ continue
326
+ if min_count and issue.count < min_count:
327
+ continue
328
+ if exact_count and issue.count != exact_count:
329
+ continue
330
+ return # Found matching issue
331
+
332
+ # Build failure message
333
+ criteria = []
334
+ if column:
335
+ criteria.append(f"column='{column}'")
336
+ if issue_type:
337
+ criteria.append(f"issue_type='{issue_type}'")
338
+ if min_count:
339
+ criteria.append(f"min_count={min_count}")
340
+ if exact_count:
341
+ criteria.append(f"exact_count={exact_count}")
342
+ if severity:
343
+ criteria.append(f"severity={severity.value}")
344
+
345
+ found_summary = (
346
+ "\n".join(
347
+ f" - {i.column}: {i.issue_type} ({i.count}, {i.severity.value})"
348
+ for i in result.issues
349
+ )
350
+ if result.issues
351
+ else " (none)"
352
+ )
353
+
354
+ self.fail(
355
+ f"Expected issue matching {', '.join(criteria)}, "
356
+ f"but found:\n{found_summary}"
357
+ )
358
+
359
+ def assert_issue_count(self, expected: int) -> None:
360
+ """Assert the total number of distinct issues.
361
+
362
+ Args:
363
+ expected: Expected number of issues
364
+ """
365
+ result = self.last_result
366
+ self.assertEqual(
367
+ len(result.issues),
368
+ expected,
369
+ f"Expected {expected} issues, found {len(result.issues)}",
370
+ )
371
+
372
+ def assert_total_violations(self, expected: int) -> None:
373
+ """Assert the sum of all violation counts.
374
+
375
+ Args:
376
+ expected: Expected total violations
377
+ """
378
+ result = self.last_result
379
+ total = sum(i.count for i in result.issues)
380
+ self.assertEqual(
381
+ total,
382
+ expected,
383
+ f"Expected {expected} total violations, found {total}",
384
+ )
385
+
386
+ def assert_no_error(self) -> None:
387
+ """Assert that no error occurred during validation."""
388
+ result = self.last_result
389
+ if result.error:
390
+ self.fail(f"Expected no error, but got: {result.error}")
391
+
392
+ def assert_error(self, error_type: type[Exception] | None = None) -> None:
393
+ """Assert that an error occurred.
394
+
395
+ Args:
396
+ error_type: Expected error type (None = any error)
397
+ """
398
+ result = self.last_result
399
+ if not result.error:
400
+ self.fail("Expected an error, but validation succeeded")
401
+ if error_type and not isinstance(result.error, error_type):
402
+ self.fail(
403
+ f"Expected {error_type.__name__}, "
404
+ f"but got {type(result.error).__name__}: {result.error}"
405
+ )
406
+
407
+ def assert_performance(
408
+ self,
409
+ max_ms: float,
410
+ rows: int | None = None,
411
+ ) -> None:
412
+ """Assert that validation completed within time limit.
413
+
414
+ Args:
415
+ max_ms: Maximum allowed milliseconds
416
+ rows: Row count for per-row calculation (optional)
417
+ """
418
+ result = self.last_result
419
+ if result.execution_time_ms > max_ms:
420
+ per_row = ""
421
+ if rows:
422
+ per_row = f" ({result.execution_time_ms / rows:.4f} ms/row)"
423
+ self.fail(
424
+ f"Validation took {result.execution_time_ms:.2f}ms, "
425
+ f"expected <= {max_ms}ms{per_row}"
426
+ )
427
+
428
+
429
+ # ============================================================================
430
+ # Test Data Generators
431
+ # ============================================================================
432
+
433
+
434
+ def create_test_dataframe(
435
+ data: dict[str, list[Any]] | None = None,
436
+ rows: int = 100,
437
+ columns: list[str] | None = None,
438
+ include_nulls: bool = False,
439
+ null_probability: float = 0.1,
440
+ seed: int = 42,
441
+ ) -> pl.LazyFrame:
442
+ """Create a test DataFrame with configurable properties.
443
+
444
+ Args:
445
+ data: Explicit data (if provided, other args ignored)
446
+ rows: Number of rows to generate
447
+ columns: Column names to generate
448
+ include_nulls: Whether to include null values
449
+ null_probability: Probability of null values
450
+ seed: Random seed
451
+
452
+ Returns:
453
+ Test LazyFrame
454
+ """
455
+ if data is not None:
456
+ return pl.LazyFrame(data)
457
+
458
+ import random
459
+
460
+ random.seed(seed)
461
+
462
+ if columns is None:
463
+ columns = ["id", "name", "value", "date"]
464
+
465
+ generated_data: dict[str, list[Any]] = {}
466
+
467
+ for col in columns:
468
+ if col == "id":
469
+ values: list[Any] = list(range(rows))
470
+ elif col == "name":
471
+ values = [f"item_{i}" for i in range(rows)]
472
+ elif col == "value":
473
+ values = [random.random() * 100 for _ in range(rows)]
474
+ elif col == "date":
475
+ from datetime import date, timedelta
476
+
477
+ base = date(2024, 1, 1)
478
+ values = [base + timedelta(days=i) for i in range(rows)]
479
+ else:
480
+ values = [f"{col}_{i}" for i in range(rows)]
481
+
482
+ if include_nulls:
483
+ values = [
484
+ None if random.random() < null_probability else v for v in values
485
+ ]
486
+
487
+ generated_data[col] = values
488
+
489
+ return pl.LazyFrame(generated_data)
490
+
491
+
492
+ def create_edge_case_data() -> dict[str, pl.LazyFrame]:
493
+ """Create a collection of edge case test DataFrames.
494
+
495
+ Returns:
496
+ Dictionary mapping case names to LazyFrames
497
+ """
498
+ cases: dict[str, pl.LazyFrame] = {}
499
+
500
+ # Empty DataFrame
501
+ cases["empty"] = pl.LazyFrame({"col": []})
502
+
503
+ # Single row
504
+ cases["single_row"] = pl.LazyFrame({"col": [1]})
505
+
506
+ # All nulls
507
+ cases["all_nulls"] = pl.LazyFrame({"col": [None, None, None]})
508
+
509
+ # Mixed types (will be Object)
510
+ cases["uniform_values"] = pl.LazyFrame({"col": [1, 1, 1, 1, 1]})
511
+
512
+ # Large values
513
+ cases["large_values"] = pl.LazyFrame(
514
+ {"col": [10**15, 10**16, 10**17]}
515
+ )
516
+
517
+ # Small values
518
+ cases["small_values"] = pl.LazyFrame(
519
+ {"col": [1e-15, 1e-16, 1e-17]}
520
+ )
521
+
522
+ # Unicode strings
523
+ cases["unicode"] = pl.LazyFrame(
524
+ {"col": ["한글", "日本語", "中文", "العربية", "🎉"]}
525
+ )
526
+
527
+ # Empty strings
528
+ cases["empty_strings"] = pl.LazyFrame(
529
+ {"col": ["", "", "value", ""]}
530
+ )
531
+
532
+ # Whitespace strings
533
+ cases["whitespace"] = pl.LazyFrame(
534
+ {"col": [" ", "\t", "\n", "value"]}
535
+ )
536
+
537
+ # Special floats
538
+ cases["special_floats"] = pl.LazyFrame(
539
+ {"col": [float("inf"), float("-inf"), float("nan"), 0.0, -0.0]}
540
+ )
541
+
542
+ return cases
543
+
544
+
545
+ # ============================================================================
546
+ # Assertion Helpers
547
+ # ============================================================================
548
+
549
+
550
+ def assert_no_issues(issues: list[ValidationIssue]) -> None:
551
+ """Assert that the issue list is empty.
552
+
553
+ Args:
554
+ issues: List of validation issues
555
+
556
+ Raises:
557
+ AssertionError: If issues are present
558
+ """
559
+ if issues:
560
+ summary = "\n".join(
561
+ f" - {i.column}: {i.issue_type} ({i.count})" for i in issues
562
+ )
563
+ raise AssertionError(f"Expected no issues, found {len(issues)}:\n{summary}")
564
+
565
+
566
+ def assert_has_issue(
567
+ issues: list[ValidationIssue],
568
+ column: str | None = None,
569
+ issue_type: str | None = None,
570
+ min_count: int = 1,
571
+ ) -> ValidationIssue:
572
+ """Assert that a matching issue exists and return it.
573
+
574
+ Args:
575
+ issues: List of validation issues
576
+ column: Expected column (None = any)
577
+ issue_type: Expected issue type (None = any)
578
+ min_count: Minimum violation count
579
+
580
+ Returns:
581
+ The matching issue
582
+
583
+ Raises:
584
+ AssertionError: If no matching issue found
585
+ """
586
+ for issue in issues:
587
+ if column and issue.column != column:
588
+ continue
589
+ if issue_type and issue.issue_type != issue_type:
590
+ continue
591
+ if issue.count < min_count:
592
+ continue
593
+ return issue
594
+
595
+ criteria = []
596
+ if column:
597
+ criteria.append(f"column='{column}'")
598
+ if issue_type:
599
+ criteria.append(f"issue_type='{issue_type}'")
600
+ criteria.append(f"min_count={min_count}")
601
+
602
+ found = (
603
+ "\n".join(f" - {i.column}: {i.issue_type} ({i.count})" for i in issues)
604
+ if issues
605
+ else " (none)"
606
+ )
607
+
608
+ raise AssertionError(
609
+ f"Expected issue matching {', '.join(criteria)}, found:\n{found}"
610
+ )
611
+
612
+
613
+ def assert_issue_count(issues: list[ValidationIssue], expected: int) -> None:
614
+ """Assert the total number of issues.
615
+
616
+ Args:
617
+ issues: List of validation issues
618
+ expected: Expected count
619
+
620
+ Raises:
621
+ AssertionError: If count doesn't match
622
+ """
623
+ if len(issues) != expected:
624
+ raise AssertionError(
625
+ f"Expected {expected} issues, found {len(issues)}"
626
+ )
627
+
628
+
629
+ # ============================================================================
630
+ # Performance Testing
631
+ # ============================================================================
632
+
633
+
634
+ @dataclass
635
+ class BenchmarkResult:
636
+ """Result of a validator benchmark.
637
+
638
+ Attributes:
639
+ validator_name: Name of the validator
640
+ row_count: Number of rows tested
641
+ iterations: Number of iterations run
642
+ mean_ms: Mean execution time in ms
643
+ min_ms: Minimum execution time
644
+ max_ms: Maximum execution time
645
+ std_ms: Standard deviation
646
+ throughput_rows_per_sec: Rows processed per second
647
+ """
648
+
649
+ validator_name: str
650
+ row_count: int
651
+ iterations: int
652
+ mean_ms: float
653
+ min_ms: float
654
+ max_ms: float
655
+ std_ms: float
656
+ throughput_rows_per_sec: float
657
+
658
+ def to_dict(self) -> dict[str, Any]:
659
+ return {
660
+ "validator": self.validator_name,
661
+ "rows": self.row_count,
662
+ "iterations": self.iterations,
663
+ "mean_ms": round(self.mean_ms, 2),
664
+ "min_ms": round(self.min_ms, 2),
665
+ "max_ms": round(self.max_ms, 2),
666
+ "std_ms": round(self.std_ms, 2),
667
+ "throughput": round(self.throughput_rows_per_sec, 0),
668
+ }
669
+
670
+
671
+ def benchmark_validator(
672
+ validator: Validator,
673
+ lf: pl.LazyFrame,
674
+ iterations: int = 10,
675
+ warmup: int = 2,
676
+ ) -> BenchmarkResult:
677
+ """Benchmark a validator's performance.
678
+
679
+ Args:
680
+ validator: Validator to benchmark
681
+ lf: LazyFrame to validate
682
+ iterations: Number of timed iterations
683
+ warmup: Number of warmup iterations (not timed)
684
+
685
+ Returns:
686
+ BenchmarkResult with timing statistics
687
+ """
688
+ import statistics
689
+
690
+ # Get row count
691
+ row_count = lf.select(pl.len()).collect().item()
692
+
693
+ # Warmup
694
+ for _ in range(warmup):
695
+ validator.validate(lf)
696
+
697
+ # Timed iterations
698
+ times: list[float] = []
699
+ for _ in range(iterations):
700
+ start = time.time()
701
+ validator.validate(lf)
702
+ times.append((time.time() - start) * 1000)
703
+
704
+ mean_ms = statistics.mean(times)
705
+ throughput = (row_count / mean_ms) * 1000 if mean_ms > 0 else 0
706
+
707
+ return BenchmarkResult(
708
+ validator_name=validator.name,
709
+ row_count=row_count,
710
+ iterations=iterations,
711
+ mean_ms=mean_ms,
712
+ min_ms=min(times),
713
+ max_ms=max(times),
714
+ std_ms=statistics.stdev(times) if len(times) > 1 else 0,
715
+ throughput_rows_per_sec=throughput,
716
+ )
717
+
718
+
719
+ class ValidatorBenchmark:
720
+ """Utility class for benchmarking validators.
721
+
722
+ Example:
723
+ benchmark = ValidatorBenchmark()
724
+ benchmark.add_validator(NullValidator())
725
+ benchmark.add_validator(UniqueValidator())
726
+
727
+ results = benchmark.run(row_counts=[1000, 10000, 100000])
728
+ benchmark.print_report()
729
+ """
730
+
731
+ def __init__(self) -> None:
732
+ self._validators: list[Validator] = []
733
+ self._results: list[BenchmarkResult] = []
734
+
735
+ def add_validator(self, validator: Validator) -> "ValidatorBenchmark":
736
+ """Add a validator to benchmark.
737
+
738
+ Args:
739
+ validator: Validator instance
740
+
741
+ Returns:
742
+ Self for chaining
743
+ """
744
+ self._validators.append(validator)
745
+ return self
746
+
747
+ def run(
748
+ self,
749
+ row_counts: list[int] | None = None,
750
+ iterations: int = 10,
751
+ data_generator: Callable[[int], pl.LazyFrame] | None = None,
752
+ ) -> list[BenchmarkResult]:
753
+ """Run benchmarks for all validators.
754
+
755
+ Args:
756
+ row_counts: List of row counts to test
757
+ iterations: Iterations per benchmark
758
+ data_generator: Function to generate test data
759
+
760
+ Returns:
761
+ List of benchmark results
762
+ """
763
+ if row_counts is None:
764
+ row_counts = [1000, 10000, 100000]
765
+
766
+ if data_generator is None:
767
+ data_generator = lambda n: create_test_dataframe(rows=n)
768
+
769
+ self._results = []
770
+
771
+ for rows in row_counts:
772
+ lf = data_generator(rows)
773
+ for validator in self._validators:
774
+ result = benchmark_validator(validator, lf, iterations)
775
+ self._results.append(result)
776
+
777
+ return self._results
778
+
779
+ def print_report(self) -> None:
780
+ """Print a formatted benchmark report."""
781
+ if not self._results:
782
+ print("No benchmark results. Run benchmarks first.")
783
+ return
784
+
785
+ # Group by validator
786
+ by_validator: dict[str, list[BenchmarkResult]] = {}
787
+ for r in self._results:
788
+ if r.validator_name not in by_validator:
789
+ by_validator[r.validator_name] = []
790
+ by_validator[r.validator_name].append(r)
791
+
792
+ print("\n" + "=" * 70)
793
+ print("VALIDATOR BENCHMARK REPORT")
794
+ print("=" * 70)
795
+
796
+ for name, results in by_validator.items():
797
+ print(f"\n{name}:")
798
+ print("-" * 50)
799
+ print(f"{'Rows':>12} {'Mean (ms)':>12} {'Throughput':>15}")
800
+ print("-" * 50)
801
+ for r in sorted(results, key=lambda x: x.row_count):
802
+ print(
803
+ f"{r.row_count:>12,} {r.mean_ms:>12.2f} "
804
+ f"{r.throughput_rows_per_sec:>12,.0f}/s"
805
+ )
806
+
807
+ print("\n" + "=" * 70)