truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,753 @@
1
+ """Streaming ECDF and statistical test implementations.
2
+
3
+ This module provides memory-efficient implementations of statistical tests
4
+ that traditionally require loading two full datasets into memory.
5
+
6
+ Key Algorithms:
7
+ - StreamingECDF: Approximate ECDF using T-Digest
8
+ - StreamingKSTest: Streaming Kolmogorov-Smirnov test
9
+ - StreamingStatistics: Online mean, variance, quantiles
10
+
11
+ Memory Complexity:
12
+ - Traditional KS test: O(n + m) for n, m samples
13
+ - Streaming KS test: O(compression) constant memory
14
+
15
+ Usage:
16
+ class StreamingKSValidator(DriftValidator, StreamingECDFMixin):
17
+ def validate(self, lf):
18
+ # Build reference ECDF from stream
19
+ ref_ecdf = self.build_streaming_ecdf(reference_lf, column)
20
+
21
+ # Compute KS statistic vs current data stream
22
+ ks_stat = self.streaming_ks_statistic(ref_ecdf, current_lf, column)
23
+ """
24
+
25
+ from __future__ import annotations
26
+
27
+ from dataclasses import dataclass, field
28
+ from typing import Any, Iterator, TYPE_CHECKING
29
+ import heapq
30
+ import math
31
+
32
+ import numpy as np
33
+
34
+ if TYPE_CHECKING:
35
+ import polars as pl
36
+
37
+
38
+ @dataclass
39
+ class Centroid:
40
+ """A centroid in T-Digest.
41
+
42
+ Represents a cluster of values with a mean and count.
43
+ """
44
+
45
+ mean: float
46
+ count: int
47
+
48
+ def __lt__(self, other: "Centroid") -> bool:
49
+ return self.mean < other.mean
50
+
51
+
52
+ class TDigest:
53
+ """T-Digest data structure for streaming quantile estimation.
54
+
55
+ T-Digest provides accurate quantile estimates with bounded memory,
56
+ especially accurate at the tails of the distribution.
57
+
58
+ Reference: Dunning & Ertl, "Computing Extremely Accurate Quantiles
59
+ Using t-Digests" (2019)
60
+
61
+ Memory: O(compression) regardless of data size
62
+ Accuracy: ~0.1% error at p=0.99, better at extremes
63
+
64
+ Example:
65
+ digest = TDigest(compression=100)
66
+ for batch in data_stream:
67
+ digest.update(batch)
68
+ median = digest.quantile(0.5)
69
+ p99 = digest.quantile(0.99)
70
+ """
71
+
72
+ def __init__(self, compression: float = 100.0):
73
+ """Initialize T-Digest.
74
+
75
+ Args:
76
+ compression: Compression factor (higher = more accurate, more memory)
77
+ Typical values: 100-500
78
+ """
79
+ self.compression = compression
80
+ self._centroids: list[Centroid] = []
81
+ self._total_count = 0
82
+ self._min = float("inf")
83
+ self._max = float("-inf")
84
+ self._buffer: list[float] = []
85
+ self._buffer_size = int(compression * 2)
86
+
87
+ def update(self, values: np.ndarray | list | float) -> None:
88
+ """Update digest with new values.
89
+
90
+ Args:
91
+ values: Single value, list, or numpy array
92
+ """
93
+ if isinstance(values, (int, float)):
94
+ values = [values]
95
+ elif isinstance(values, np.ndarray):
96
+ values = values.flatten().tolist()
97
+
98
+ # Remove NaN/inf values
99
+ values = [v for v in values if math.isfinite(v)]
100
+
101
+ if not values:
102
+ return
103
+
104
+ self._buffer.extend(values)
105
+ self._min = min(self._min, min(values))
106
+ self._max = max(self._max, max(values))
107
+
108
+ if len(self._buffer) >= self._buffer_size:
109
+ self._flush_buffer()
110
+
111
+ def _flush_buffer(self) -> None:
112
+ """Merge buffer into centroids."""
113
+ if not self._buffer:
114
+ return
115
+
116
+ # Sort buffer
117
+ self._buffer.sort()
118
+
119
+ # Merge each value
120
+ for value in self._buffer:
121
+ self._add_centroid(Centroid(mean=value, count=1))
122
+
123
+ self._buffer = []
124
+
125
+ # Compress if too many centroids
126
+ if len(self._centroids) > 3 * self.compression:
127
+ self._compress()
128
+
129
+ def _add_centroid(self, new_centroid: Centroid) -> None:
130
+ """Add a centroid, potentially merging with neighbors."""
131
+ self._total_count += new_centroid.count
132
+
133
+ if not self._centroids:
134
+ self._centroids.append(new_centroid)
135
+ return
136
+
137
+ # Find insertion point
138
+ idx = self._find_insertion_point(new_centroid.mean)
139
+
140
+ # Try to merge with nearest centroid
141
+ if idx < len(self._centroids):
142
+ existing = self._centroids[idx]
143
+ merged_count = existing.count + new_centroid.count
144
+
145
+ # Check if merge is allowed (size limit based on position)
146
+ q = self._quantile_at_count(self._count_before(idx) + merged_count / 2)
147
+ max_size = self._max_size_at_quantile(q)
148
+
149
+ if merged_count <= max_size:
150
+ # Merge
151
+ new_mean = (
152
+ existing.mean * existing.count + new_centroid.mean * new_centroid.count
153
+ ) / merged_count
154
+ self._centroids[idx] = Centroid(mean=new_mean, count=merged_count)
155
+ return
156
+
157
+ # Insert as new centroid
158
+ self._centroids.insert(idx, new_centroid)
159
+
160
+ def _find_insertion_point(self, value: float) -> int:
161
+ """Binary search for insertion point."""
162
+ lo, hi = 0, len(self._centroids)
163
+ while lo < hi:
164
+ mid = (lo + hi) // 2
165
+ if self._centroids[mid].mean < value:
166
+ lo = mid + 1
167
+ else:
168
+ hi = mid
169
+ return lo
170
+
171
+ def _count_before(self, idx: int) -> int:
172
+ """Count of all values before index."""
173
+ return sum(c.count for c in self._centroids[:idx])
174
+
175
+ def _quantile_at_count(self, count: float) -> float:
176
+ """Get quantile at given count."""
177
+ if self._total_count == 0:
178
+ return 0.5
179
+ return count / self._total_count
180
+
181
+ def _max_size_at_quantile(self, q: float) -> float:
182
+ """Maximum centroid size at given quantile.
183
+
184
+ Uses the scale function k(q) = δ/2 * (arcsin(2q-1)/π + 1/2)
185
+ which gives smaller centroids at the tails.
186
+ """
187
+ # Scale function derivative
188
+ k_prime = (
189
+ self.compression
190
+ * math.pi
191
+ / (2 * math.sqrt(q * (1 - q) + 1e-10))
192
+ )
193
+ return max(1, int(self._total_count / k_prime))
194
+
195
+ def _compress(self) -> None:
196
+ """Compress centroids to reduce memory."""
197
+ if len(self._centroids) <= self.compression:
198
+ return
199
+
200
+ # Merge adjacent centroids greedily
201
+ new_centroids = []
202
+ i = 0
203
+
204
+ while i < len(self._centroids):
205
+ current = self._centroids[i]
206
+
207
+ # Try to merge with next
208
+ while i + 1 < len(self._centroids):
209
+ next_c = self._centroids[i + 1]
210
+ merged_count = current.count + next_c.count
211
+
212
+ q = self._quantile_at_count(
213
+ sum(c.count for c in new_centroids) + merged_count / 2
214
+ )
215
+ max_size = self._max_size_at_quantile(q)
216
+
217
+ if merged_count <= max_size:
218
+ # Merge
219
+ new_mean = (
220
+ current.mean * current.count + next_c.mean * next_c.count
221
+ ) / merged_count
222
+ current = Centroid(mean=new_mean, count=merged_count)
223
+ i += 1
224
+ else:
225
+ break
226
+
227
+ new_centroids.append(current)
228
+ i += 1
229
+
230
+ self._centroids = new_centroids
231
+
232
+ def quantile(self, q: float) -> float:
233
+ """Get quantile value.
234
+
235
+ Args:
236
+ q: Quantile (0 to 1)
237
+
238
+ Returns:
239
+ Value at quantile
240
+ """
241
+ self._flush_buffer()
242
+
243
+ if not self._centroids:
244
+ return float("nan")
245
+
246
+ if q <= 0:
247
+ return self._min
248
+ if q >= 1:
249
+ return self._max
250
+
251
+ target_count = q * self._total_count
252
+ cumulative = 0
253
+
254
+ for i, centroid in enumerate(self._centroids):
255
+ next_cumulative = cumulative + centroid.count
256
+
257
+ if next_cumulative >= target_count:
258
+ # Interpolate within centroid
259
+ if i == 0:
260
+ left = self._min
261
+ else:
262
+ left = (self._centroids[i - 1].mean + centroid.mean) / 2
263
+
264
+ if i == len(self._centroids) - 1:
265
+ right = self._max
266
+ else:
267
+ right = (centroid.mean + self._centroids[i + 1].mean) / 2
268
+
269
+ # Linear interpolation
270
+ frac = (target_count - cumulative) / centroid.count
271
+ return left + frac * (right - left)
272
+
273
+ cumulative = next_cumulative
274
+
275
+ return self._max
276
+
277
+ def cdf(self, value: float) -> float:
278
+ """Get CDF value (proportion of values <= x).
279
+
280
+ Args:
281
+ value: Value to query
282
+
283
+ Returns:
284
+ Proportion of values <= value
285
+ """
286
+ self._flush_buffer()
287
+
288
+ if not self._centroids:
289
+ return 0.5
290
+ if value <= self._min:
291
+ return 0.0
292
+ if value >= self._max:
293
+ return 1.0
294
+
295
+ cumulative = 0
296
+
297
+ for i, centroid in enumerate(self._centroids):
298
+ if value < centroid.mean:
299
+ # Interpolate
300
+ if i == 0:
301
+ left = self._min
302
+ left_count = 0
303
+ else:
304
+ left = self._centroids[i - 1].mean
305
+ left_count = cumulative
306
+
307
+ # Proportion within this region
308
+ frac = (value - left) / (centroid.mean - left + 1e-10)
309
+ return (left_count + frac * centroid.count / 2) / self._total_count
310
+
311
+ cumulative += centroid.count
312
+
313
+ return 1.0
314
+
315
+ @property
316
+ def count(self) -> int:
317
+ """Total count of values."""
318
+ return self._total_count + len(self._buffer)
319
+
320
+ @property
321
+ def mean(self) -> float:
322
+ """Estimate mean from centroids."""
323
+ self._flush_buffer()
324
+ if self._total_count == 0:
325
+ return float("nan")
326
+ return sum(c.mean * c.count for c in self._centroids) / self._total_count
327
+
328
+ def merge(self, other: "TDigest") -> "TDigest":
329
+ """Merge with another T-Digest.
330
+
331
+ Args:
332
+ other: Another T-Digest
333
+
334
+ Returns:
335
+ New merged T-Digest
336
+ """
337
+ result = TDigest(compression=max(self.compression, other.compression))
338
+
339
+ # Flush both buffers
340
+ self._flush_buffer()
341
+ other._flush_buffer()
342
+
343
+ # Merge all centroids
344
+ all_centroids = self._centroids + other._centroids
345
+ all_centroids.sort()
346
+
347
+ for c in all_centroids:
348
+ result._add_centroid(Centroid(mean=c.mean, count=c.count))
349
+
350
+ result._min = min(self._min, other._min)
351
+ result._max = max(self._max, other._max)
352
+ result._compress()
353
+
354
+ return result
355
+
356
+
357
+ class StreamingECDF:
358
+ """Streaming Empirical CDF using T-Digest.
359
+
360
+ Provides memory-efficient ECDF computation for large datasets.
361
+
362
+ Example:
363
+ ecdf = StreamingECDF(compression=200)
364
+ for batch in data_stream:
365
+ ecdf.update(batch)
366
+
367
+ # Query CDF at specific points
368
+ cdf_values = ecdf.cdf(query_points)
369
+
370
+ # Get quantiles
371
+ median = ecdf.quantile(0.5)
372
+ """
373
+
374
+ def __init__(self, compression: float = 200.0):
375
+ """Initialize streaming ECDF.
376
+
377
+ Args:
378
+ compression: T-Digest compression factor
379
+ """
380
+ self._digest = TDigest(compression=compression)
381
+
382
+ def update(self, values: np.ndarray) -> None:
383
+ """Update ECDF with new values.
384
+
385
+ Args:
386
+ values: Array of values
387
+ """
388
+ self._digest.update(values)
389
+
390
+ def cdf(self, x: np.ndarray | float) -> np.ndarray | float:
391
+ """Compute CDF at given points.
392
+
393
+ Args:
394
+ x: Query points
395
+
396
+ Returns:
397
+ CDF values
398
+ """
399
+ if isinstance(x, (int, float)):
400
+ return self._digest.cdf(x)
401
+
402
+ return np.array([self._digest.cdf(xi) for xi in x])
403
+
404
+ def quantile(self, q: float | np.ndarray) -> float | np.ndarray:
405
+ """Compute quantile values.
406
+
407
+ Args:
408
+ q: Quantile(s) to compute
409
+
410
+ Returns:
411
+ Value(s) at quantile(s)
412
+ """
413
+ if isinstance(q, (int, float)):
414
+ return self._digest.quantile(q)
415
+
416
+ return np.array([self._digest.quantile(qi) for qi in q])
417
+
418
+ @property
419
+ def count(self) -> int:
420
+ """Number of values seen."""
421
+ return self._digest.count
422
+
423
+ @property
424
+ def min(self) -> float:
425
+ """Minimum value."""
426
+ return self._digest._min
427
+
428
+ @property
429
+ def max(self) -> float:
430
+ """Maximum value."""
431
+ return self._digest._max
432
+
433
+ def merge(self, other: "StreamingECDF") -> "StreamingECDF":
434
+ """Merge with another ECDF."""
435
+ result = StreamingECDF()
436
+ result._digest = self._digest.merge(other._digest)
437
+ return result
438
+
439
+
440
+ @dataclass
441
+ class StreamingStatistics:
442
+ """Streaming statistics for distribution comparison.
443
+
444
+ Tracks running statistics that can be used for statistical tests
445
+ without storing the full dataset.
446
+
447
+ Tracks:
448
+ - Count, mean, variance (Welford's algorithm)
449
+ - Min, max
450
+ - Quantiles via T-Digest
451
+ """
452
+
453
+ count: int = 0
454
+ mean: float = 0.0
455
+ m2: float = 0.0 # Sum of squared differences
456
+ min_val: float = float("inf")
457
+ max_val: float = float("-inf")
458
+ _digest: TDigest = field(default_factory=lambda: TDigest(compression=100))
459
+
460
+ def update(self, values: np.ndarray) -> None:
461
+ """Update statistics with new values.
462
+
463
+ Args:
464
+ values: Array of values (NaN values are ignored)
465
+ """
466
+ values = values[np.isfinite(values)]
467
+ if len(values) == 0:
468
+ return
469
+
470
+ for x in values:
471
+ self.count += 1
472
+ delta = x - self.mean
473
+ self.mean += delta / self.count
474
+ delta2 = x - self.mean
475
+ self.m2 += delta * delta2
476
+
477
+ self.min_val = min(self.min_val, values.min())
478
+ self.max_val = max(self.max_val, values.max())
479
+ self._digest.update(values)
480
+
481
+ def update_batch(self, values: np.ndarray) -> None:
482
+ """Batch update for efficiency."""
483
+ values = values[np.isfinite(values)]
484
+ if len(values) == 0:
485
+ return
486
+
487
+ n = len(values)
488
+ batch_mean = values.mean()
489
+ batch_var = values.var(ddof=0)
490
+
491
+ if self.count == 0:
492
+ self.count = n
493
+ self.mean = batch_mean
494
+ self.m2 = batch_var * n
495
+ else:
496
+ total = self.count + n
497
+ delta = batch_mean - self.mean
498
+
499
+ self.mean = (self.count * self.mean + n * batch_mean) / total
500
+ self.m2 += batch_var * n + delta**2 * self.count * n / total
501
+ self.count = total
502
+
503
+ self.min_val = min(self.min_val, values.min())
504
+ self.max_val = max(self.max_val, values.max())
505
+ self._digest.update(values)
506
+
507
+ @property
508
+ def variance(self) -> float:
509
+ """Sample variance."""
510
+ if self.count < 2:
511
+ return 0.0
512
+ return self.m2 / (self.count - 1)
513
+
514
+ @property
515
+ def std(self) -> float:
516
+ """Sample standard deviation."""
517
+ return math.sqrt(self.variance)
518
+
519
+ def quantile(self, q: float) -> float:
520
+ """Get quantile value."""
521
+ return self._digest.quantile(q)
522
+
523
+ def cdf(self, x: float) -> float:
524
+ """Get CDF value at x."""
525
+ return self._digest.cdf(x)
526
+
527
+
528
+ class StreamingECDFMixin:
529
+ """Mixin providing streaming ECDF and statistical test capabilities.
530
+
531
+ This mixin enables memory-efficient statistical tests like KS test
532
+ that traditionally require loading both datasets into memory.
533
+
534
+ Example:
535
+ class StreamingKSValidator(DriftValidator, StreamingECDFMixin):
536
+ def __init__(self, ...):
537
+ ...
538
+ # Build reference ECDF once
539
+ self._ref_ecdf = None
540
+
541
+ def validate(self, lf):
542
+ # Build reference ECDF if not cached
543
+ if self._ref_ecdf is None:
544
+ self._ref_ecdf = self.build_streaming_ecdf(
545
+ self.reference_data, self.column
546
+ )
547
+
548
+ # Compute KS statistic against current data
549
+ ks_stat, p_value = self.streaming_ks_test(
550
+ self._ref_ecdf, lf, self.column
551
+ )
552
+ """
553
+
554
+ def build_streaming_ecdf(
555
+ self,
556
+ lf: "pl.LazyFrame",
557
+ column: str,
558
+ compression: float = 200.0,
559
+ chunk_size: int = 100000,
560
+ ) -> StreamingECDF:
561
+ """Build ECDF from streaming data.
562
+
563
+ Args:
564
+ lf: Input LazyFrame
565
+ column: Column to analyze
566
+ compression: T-Digest compression factor
567
+ chunk_size: Processing chunk size
568
+
569
+ Returns:
570
+ StreamingECDF instance
571
+ """
572
+ import polars as pl
573
+ from truthound.validators.memory.base import DataChunker
574
+
575
+ ecdf = StreamingECDF(compression=compression)
576
+
577
+ chunker = DataChunker(
578
+ chunk_size=chunk_size,
579
+ columns=[column],
580
+ drop_nulls=True,
581
+ )
582
+
583
+ for chunk_df in chunker.iterate(lf):
584
+ values = chunk_df.to_series().to_numpy()
585
+ ecdf.update(values)
586
+
587
+ return ecdf
588
+
589
+ def build_streaming_statistics(
590
+ self,
591
+ lf: "pl.LazyFrame",
592
+ column: str,
593
+ chunk_size: int = 100000,
594
+ ) -> StreamingStatistics:
595
+ """Build streaming statistics from data.
596
+
597
+ Args:
598
+ lf: Input LazyFrame
599
+ column: Column to analyze
600
+ chunk_size: Processing chunk size
601
+
602
+ Returns:
603
+ StreamingStatistics instance
604
+ """
605
+ import polars as pl
606
+ from truthound.validators.memory.base import DataChunker
607
+
608
+ stats = StreamingStatistics()
609
+
610
+ chunker = DataChunker(
611
+ chunk_size=chunk_size,
612
+ columns=[column],
613
+ drop_nulls=True,
614
+ )
615
+
616
+ for chunk_df in chunker.iterate(lf):
617
+ values = chunk_df.to_series().to_numpy()
618
+ stats.update_batch(values)
619
+
620
+ return stats
621
+
622
+ def streaming_ks_statistic(
623
+ self,
624
+ ref_ecdf: StreamingECDF,
625
+ lf: "pl.LazyFrame",
626
+ column: str,
627
+ chunk_size: int = 100000,
628
+ ) -> float:
629
+ """Compute KS statistic against streaming current data.
630
+
631
+ Uses the reference ECDF and computes the maximum deviation
632
+ as current data streams through.
633
+
634
+ Args:
635
+ ref_ecdf: Reference ECDF
636
+ lf: Current data LazyFrame
637
+ column: Column to compare
638
+ chunk_size: Processing chunk size
639
+
640
+ Returns:
641
+ KS statistic (max |F_ref(x) - F_curr(x)|)
642
+ """
643
+ from truthound.validators.memory.base import DataChunker
644
+
645
+ # Build current ECDF
646
+ curr_ecdf = self.build_streaming_ecdf(lf, column, chunk_size=chunk_size)
647
+
648
+ # Compute max deviation at key quantile points
649
+ # Using more points for better accuracy
650
+ quantile_points = np.linspace(0.001, 0.999, 1000)
651
+
652
+ max_deviation = 0.0
653
+
654
+ for q in quantile_points:
655
+ # Get values at this quantile from both distributions
656
+ ref_val = ref_ecdf.quantile(q)
657
+ curr_val = curr_ecdf.quantile(q)
658
+
659
+ # Compute CDF difference at these points
660
+ ref_cdf_at_ref = ref_ecdf.cdf(ref_val)
661
+ curr_cdf_at_ref = curr_ecdf.cdf(ref_val)
662
+ dev1 = abs(ref_cdf_at_ref - curr_cdf_at_ref)
663
+
664
+ ref_cdf_at_curr = ref_ecdf.cdf(curr_val)
665
+ curr_cdf_at_curr = curr_ecdf.cdf(curr_val)
666
+ dev2 = abs(ref_cdf_at_curr - curr_cdf_at_curr)
667
+
668
+ max_deviation = max(max_deviation, dev1, dev2)
669
+
670
+ return max_deviation
671
+
672
+ def streaming_ks_test(
673
+ self,
674
+ ref_ecdf: StreamingECDF,
675
+ lf: "pl.LazyFrame",
676
+ column: str,
677
+ chunk_size: int = 100000,
678
+ ) -> tuple[float, float]:
679
+ """Perform streaming KS test.
680
+
681
+ Args:
682
+ ref_ecdf: Reference ECDF
683
+ lf: Current data LazyFrame
684
+ column: Column to compare
685
+ chunk_size: Processing chunk size
686
+
687
+ Returns:
688
+ Tuple of (ks_statistic, approximate_p_value)
689
+ """
690
+ ks_stat = self.streaming_ks_statistic(ref_ecdf, lf, column, chunk_size)
691
+
692
+ # Approximate p-value using asymptotic distribution
693
+ # P(D_n > x) ≈ 2 * sum_{k=1}^inf (-1)^(k+1) * exp(-2 * k^2 * n * x^2)
694
+ n_eff = min(ref_ecdf.count, lf.select(pl.len()).collect().item())
695
+ if n_eff == 0:
696
+ return ks_stat, 1.0
697
+
698
+ # Simplified asymptotic formula
699
+ lambda_val = (math.sqrt(n_eff) + 0.12 + 0.11 / math.sqrt(n_eff)) * ks_stat
700
+
701
+ # Kolmogorov distribution approximation
702
+ p_value = 0.0
703
+ for k in range(1, 100):
704
+ term = 2 * ((-1) ** (k + 1)) * math.exp(-2 * k * k * lambda_val * lambda_val)
705
+ p_value += term
706
+ if abs(term) < 1e-10:
707
+ break
708
+
709
+ p_value = max(0.0, min(1.0, p_value))
710
+
711
+ return ks_stat, p_value
712
+
713
+ def streaming_wasserstein(
714
+ self,
715
+ ref_stats: StreamingStatistics,
716
+ lf: "pl.LazyFrame",
717
+ column: str,
718
+ chunk_size: int = 100000,
719
+ ) -> float:
720
+ """Compute approximate Wasserstein distance using streaming statistics.
721
+
722
+ Uses the quantile function approach for memory efficiency.
723
+
724
+ Args:
725
+ ref_stats: Reference statistics
726
+ lf: Current data LazyFrame
727
+ column: Column to compare
728
+ chunk_size: Processing chunk size
729
+
730
+ Returns:
731
+ Approximate Wasserstein distance
732
+ """
733
+ # Build current statistics
734
+ curr_stats = self.build_streaming_statistics(lf, column, chunk_size)
735
+
736
+ # Approximate Wasserstein using quantile differences
737
+ n_points = 100
738
+ quantiles = np.linspace(0.01, 0.99, n_points)
739
+
740
+ total_diff = 0.0
741
+ for q in quantiles:
742
+ ref_val = ref_stats.quantile(q)
743
+ curr_val = curr_stats.quantile(q)
744
+ total_diff += abs(ref_val - curr_val)
745
+
746
+ return total_diff / n_points
747
+
748
+
749
+ # Import polars for type hints
750
+ try:
751
+ import polars as pl
752
+ except ImportError:
753
+ pl = None