truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,984 @@
1
+ """Streaming database store implementation with cursor-based iteration.
2
+
3
+ This module provides a streaming-capable database store that uses server-side
4
+ cursors for efficient handling of large validation results.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ from dataclasses import dataclass, field
11
+ from datetime import datetime
12
+ from typing import TYPE_CHECKING, Any, AsyncIterator, Iterator
13
+ from uuid import uuid4
14
+
15
+ from truthound.stores.streaming.base import (
16
+ ChunkInfo,
17
+ CompressionType,
18
+ StreamingConfig,
19
+ StreamingFormat,
20
+ StreamingMetrics,
21
+ StreamingValidationStore,
22
+ StreamSession,
23
+ StreamStatus,
24
+ )
25
+ from truthound.stores.streaming.reader import (
26
+ AsyncStreamReader,
27
+ BaseStreamReader,
28
+ get_decompressor,
29
+ get_deserializer,
30
+ )
31
+ from truthound.stores.streaming.writer import (
32
+ AsyncStreamWriter,
33
+ BaseStreamWriter,
34
+ get_compressor,
35
+ get_serializer,
36
+ )
37
+
38
+ if TYPE_CHECKING:
39
+ from truthound.stores.results import ValidationResult, ValidatorResult
40
+
41
+
42
+ # =============================================================================
43
+ # Configuration
44
+ # =============================================================================
45
+
46
+
47
+ @dataclass
48
+ class StreamingDatabaseConfig(StreamingConfig):
49
+ """Configuration for streaming database store.
50
+
51
+ Attributes:
52
+ connection_url: SQLAlchemy connection URL.
53
+ table_prefix: Prefix for table names.
54
+ pool_size: Connection pool size.
55
+ max_overflow: Maximum pool overflow connections.
56
+ use_server_cursor: Use server-side cursors for reads.
57
+ cursor_fetch_size: Number of rows to fetch per cursor iteration.
58
+ batch_insert_size: Number of rows per batch insert.
59
+ """
60
+
61
+ connection_url: str = ""
62
+ table_prefix: str = "truthound_streaming_"
63
+ pool_size: int = 5
64
+ max_overflow: int = 10
65
+ use_server_cursor: bool = True
66
+ cursor_fetch_size: int = 1000
67
+ batch_insert_size: int = 1000
68
+
69
+ def validate(self) -> None:
70
+ """Validate configuration."""
71
+ super().validate()
72
+ if not self.connection_url:
73
+ raise ValueError("Database connection URL is required")
74
+
75
+
76
+ # =============================================================================
77
+ # Database Streaming Writer
78
+ # =============================================================================
79
+
80
+
81
+ class DatabaseStreamWriter(BaseStreamWriter):
82
+ """Database streaming writer with batch inserts.
83
+
84
+ Uses batch INSERT statements for efficient streaming writes.
85
+ """
86
+
87
+ def __init__(
88
+ self,
89
+ session: StreamSession,
90
+ config: StreamingDatabaseConfig,
91
+ engine: Any,
92
+ session_maker: Any,
93
+ results_table: Any,
94
+ ):
95
+ """Initialize the database writer.
96
+
97
+ Args:
98
+ session: The streaming session.
99
+ config: Database streaming configuration.
100
+ engine: SQLAlchemy engine.
101
+ session_maker: SQLAlchemy session maker.
102
+ results_table: Results table object.
103
+ """
104
+ super().__init__(session, config)
105
+ self.db_config = config
106
+ self._engine = engine
107
+ self._session_maker = session_maker
108
+ self._results_table = results_table
109
+ self._db_session: Any = None
110
+ self._pending_rows: list[dict[str, Any]] = []
111
+
112
+ def _get_db_session(self) -> Any:
113
+ """Get or create database session."""
114
+ if self._db_session is None:
115
+ self._db_session = self._session_maker()
116
+ return self._db_session
117
+
118
+ def _write_chunk(self, chunk_info: ChunkInfo, data: bytes) -> None:
119
+ """Write chunk data to database."""
120
+ # For database, we don't write chunks as blobs
121
+ # Instead, we write individual records
122
+ # The 'data' is compressed serialized records
123
+ decompressor = get_decompressor(self.config.compression)
124
+ deserializer = get_deserializer(self.config.format)
125
+
126
+ raw_data = decompressor.decompress(data)
127
+ records = list(deserializer.deserialize(raw_data))
128
+
129
+ # Batch insert records
130
+ db_session = self._get_db_session()
131
+ try:
132
+ for i in range(0, len(records), self.db_config.batch_insert_size):
133
+ batch = records[i : i + self.db_config.batch_insert_size]
134
+ rows = [
135
+ {
136
+ "run_id": self.session.run_id,
137
+ "chunk_id": chunk_info.chunk_id,
138
+ "chunk_index": chunk_info.chunk_index,
139
+ "record_index": i + j,
140
+ "data_json": json.dumps(record, default=str),
141
+ "created_at": datetime.utcnow(),
142
+ }
143
+ for j, record in enumerate(batch)
144
+ ]
145
+ db_session.execute(self._results_table.insert(), rows)
146
+ db_session.commit()
147
+ except Exception:
148
+ db_session.rollback()
149
+ raise
150
+
151
+ chunk_info.path = f"db://{self.session.run_id}/{chunk_info.chunk_id}"
152
+
153
+ def _write_session_state(self) -> None:
154
+ """Write session state to database."""
155
+ # Session state is stored in a separate sessions table
156
+ pass
157
+
158
+ def _finalize(self) -> None:
159
+ """Finalize the stream."""
160
+ if self._db_session:
161
+ self._db_session.close()
162
+ self._db_session = None
163
+
164
+
165
+ # =============================================================================
166
+ # Database Streaming Reader
167
+ # =============================================================================
168
+
169
+
170
+ class DatabaseStreamReader(BaseStreamReader):
171
+ """Database streaming reader with server-side cursors.
172
+
173
+ Uses server-side cursors for memory-efficient iteration over large results.
174
+ """
175
+
176
+ def __init__(
177
+ self,
178
+ run_id: str,
179
+ engine: Any,
180
+ config: StreamingDatabaseConfig,
181
+ results_table: Any,
182
+ ):
183
+ """Initialize the database reader.
184
+
185
+ Args:
186
+ run_id: The run ID to read.
187
+ engine: SQLAlchemy engine.
188
+ config: Database streaming configuration.
189
+ results_table: Results table object.
190
+ """
191
+ self._run_id = run_id
192
+ self._engine = engine
193
+ self._db_config = config
194
+ self._results_table = results_table
195
+ self._connection: Any = None
196
+ self._cursor: Any = None
197
+ self._chunks: list[ChunkInfo] = []
198
+ self._current_rows: list[Any] = []
199
+ self._row_index = 0
200
+ self._exhausted = False
201
+
202
+ # Initialize chunks from database
203
+ self._load_chunks()
204
+
205
+ super().__init__(config)
206
+
207
+ def _load_chunks(self) -> None:
208
+ """Load chunk information from database."""
209
+ from sqlalchemy import text
210
+
211
+ with self._engine.connect() as conn:
212
+ result = conn.execute(
213
+ text(
214
+ f"""
215
+ SELECT DISTINCT chunk_id, chunk_index, COUNT(*) as record_count
216
+ FROM {self._results_table.name}
217
+ WHERE run_id = :run_id
218
+ GROUP BY chunk_id, chunk_index
219
+ ORDER BY chunk_index
220
+ """
221
+ ),
222
+ {"run_id": self._run_id},
223
+ )
224
+
225
+ for row in result:
226
+ chunk_info = ChunkInfo(
227
+ chunk_id=row.chunk_id,
228
+ chunk_index=row.chunk_index,
229
+ record_count=row.record_count,
230
+ byte_size=0,
231
+ start_offset=0,
232
+ end_offset=row.record_count,
233
+ path=f"db://{self._run_id}/{row.chunk_id}",
234
+ )
235
+ self._chunks.append(chunk_info)
236
+
237
+ def _get_chunks(self) -> list[ChunkInfo]:
238
+ """Get list of chunks."""
239
+ return self._chunks
240
+
241
+ def _read_chunk(self, chunk_info: ChunkInfo) -> bytes:
242
+ """Read a chunk from database.
243
+
244
+ Note: For database reader, we use cursor-based reading instead.
245
+ This method is not typically called.
246
+ """
247
+ from sqlalchemy import text
248
+
249
+ with self._engine.connect() as conn:
250
+ result = conn.execute(
251
+ text(
252
+ f"""
253
+ SELECT data_json
254
+ FROM {self._results_table.name}
255
+ WHERE run_id = :run_id AND chunk_id = :chunk_id
256
+ ORDER BY record_index
257
+ """
258
+ ),
259
+ {"run_id": self._run_id, "chunk_id": chunk_info.chunk_id},
260
+ )
261
+
262
+ records = [json.loads(row.data_json) for row in result]
263
+ serializer = get_serializer(self.config.format)
264
+ return serializer.serialize_batch(records)
265
+
266
+ def read(self) -> dict[str, Any] | None:
267
+ """Read a single record using cursor."""
268
+ if self._exhausted:
269
+ return None
270
+
271
+ # Use cursor-based reading for efficiency
272
+ if not self._current_rows or self._row_index >= len(self._current_rows):
273
+ self._fetch_next_batch()
274
+ if not self._current_rows:
275
+ self._exhausted = True
276
+ return None
277
+
278
+ row = self._current_rows[self._row_index]
279
+ self._row_index += 1
280
+
281
+ return json.loads(row.data_json) if hasattr(row, "data_json") else row
282
+
283
+ def _fetch_next_batch(self) -> None:
284
+ """Fetch next batch of rows using server-side cursor."""
285
+ from sqlalchemy import text
286
+
287
+ if self._connection is None:
288
+ self._connection = self._engine.connect()
289
+
290
+ # Use server-side cursor if supported
291
+ if self._db_config.use_server_cursor:
292
+ # PostgreSQL: use stream_results
293
+ # MySQL: use server_side_cursors
294
+ execution_options = {"stream_results": True}
295
+ self._cursor = self._connection.execution_options(
296
+ **execution_options
297
+ ).execute(
298
+ text(
299
+ f"""
300
+ SELECT data_json
301
+ FROM {self._results_table.name}
302
+ WHERE run_id = :run_id
303
+ ORDER BY chunk_index, record_index
304
+ """
305
+ ),
306
+ {"run_id": self._run_id},
307
+ )
308
+ else:
309
+ self._cursor = self._connection.execute(
310
+ text(
311
+ f"""
312
+ SELECT data_json
313
+ FROM {self._results_table.name}
314
+ WHERE run_id = :run_id
315
+ ORDER BY chunk_index, record_index
316
+ """
317
+ ),
318
+ {"run_id": self._run_id},
319
+ )
320
+
321
+ # Fetch next batch
322
+ self._current_rows = list(
323
+ self._cursor.fetchmany(self._db_config.cursor_fetch_size)
324
+ )
325
+ self._row_index = 0
326
+
327
+ if not self._current_rows:
328
+ self._exhausted = True
329
+
330
+ def close(self) -> None:
331
+ """Close the reader and database connection."""
332
+ super().close()
333
+ if self._cursor is not None:
334
+ self._cursor.close()
335
+ self._cursor = None
336
+ if self._connection is not None:
337
+ self._connection.close()
338
+ self._connection = None
339
+
340
+ def reset(self) -> None:
341
+ """Reset reader to beginning."""
342
+ super().reset()
343
+ if self._cursor is not None:
344
+ self._cursor.close()
345
+ self._cursor = None
346
+ if self._connection is not None:
347
+ self._connection.close()
348
+ self._connection = None
349
+ self._current_rows = []
350
+ self._row_index = 0
351
+ self._exhausted = False
352
+
353
+
354
+ # =============================================================================
355
+ # Streaming Database Store
356
+ # =============================================================================
357
+
358
+
359
+ class StreamingDatabaseStore(StreamingValidationStore[StreamingDatabaseConfig]):
360
+ """Streaming database store with cursor-based iteration.
361
+
362
+ This store is optimized for handling large validation results in databases:
363
+
364
+ - Server-side cursors for memory-efficient reads
365
+ - Batch inserts for efficient writes
366
+ - Transaction management
367
+ - Connection pooling
368
+
369
+ Example:
370
+ >>> store = StreamingDatabaseStore(
371
+ ... connection_url="postgresql://user:pass@localhost/db",
372
+ ... )
373
+ >>>
374
+ >>> session = store.create_session("run_001", "large_dataset.csv")
375
+ >>> with store.create_writer(session) as writer:
376
+ ... for result in validation_results:
377
+ ... writer.write_result(result)
378
+ >>>
379
+ >>> # Efficiently iterate over results with cursor
380
+ >>> for result in store.iter_results("run_001"):
381
+ ... process(result)
382
+ """
383
+
384
+ def __init__(
385
+ self,
386
+ connection_url: str,
387
+ table_prefix: str = "truthound_streaming_",
388
+ pool_size: int = 5,
389
+ max_overflow: int = 10,
390
+ use_server_cursor: bool = True,
391
+ cursor_fetch_size: int = 1000,
392
+ **kwargs: Any,
393
+ ):
394
+ """Initialize the streaming database store.
395
+
396
+ Args:
397
+ connection_url: SQLAlchemy connection URL.
398
+ table_prefix: Prefix for table names.
399
+ pool_size: Connection pool size.
400
+ max_overflow: Maximum pool overflow connections.
401
+ use_server_cursor: Use server-side cursors for reads.
402
+ cursor_fetch_size: Rows per cursor fetch.
403
+ **kwargs: Additional configuration options.
404
+ """
405
+ config = StreamingDatabaseConfig(
406
+ connection_url=connection_url,
407
+ table_prefix=table_prefix,
408
+ pool_size=pool_size,
409
+ max_overflow=max_overflow,
410
+ use_server_cursor=use_server_cursor,
411
+ cursor_fetch_size=cursor_fetch_size,
412
+ **{k: v for k, v in kwargs.items() if hasattr(StreamingDatabaseConfig, k)},
413
+ )
414
+ super().__init__(config)
415
+
416
+ self._engine: Any = None
417
+ self._session_maker: Any = None
418
+ self._metadata: Any = None
419
+ self._results_table: Any = None
420
+ self._sessions_table: Any = None
421
+
422
+ @classmethod
423
+ def _default_config(cls) -> StreamingDatabaseConfig:
424
+ """Create default configuration."""
425
+ return StreamingDatabaseConfig()
426
+
427
+ def _do_initialize(self) -> None:
428
+ """Initialize database engine and create tables."""
429
+ try:
430
+ from sqlalchemy import (
431
+ Column,
432
+ DateTime,
433
+ Integer,
434
+ MetaData,
435
+ String,
436
+ Table,
437
+ Text,
438
+ create_engine,
439
+ )
440
+ from sqlalchemy.orm import sessionmaker
441
+ except ImportError:
442
+ raise ImportError("sqlalchemy library required for database streaming store")
443
+
444
+ # Create engine
445
+ self._engine = create_engine(
446
+ self._config.connection_url,
447
+ pool_size=self._config.pool_size,
448
+ max_overflow=self._config.max_overflow,
449
+ pool_pre_ping=True,
450
+ )
451
+
452
+ self._session_maker = sessionmaker(bind=self._engine)
453
+ self._metadata = MetaData()
454
+
455
+ # Define results table
456
+ self._results_table = Table(
457
+ f"{self._config.table_prefix}results",
458
+ self._metadata,
459
+ Column("id", Integer, primary_key=True, autoincrement=True),
460
+ Column("run_id", String(255), nullable=False, index=True),
461
+ Column("chunk_id", String(255), nullable=False, index=True),
462
+ Column("chunk_index", Integer, nullable=False),
463
+ Column("record_index", Integer, nullable=False),
464
+ Column("data_json", Text, nullable=False),
465
+ Column("created_at", DateTime, nullable=False),
466
+ )
467
+
468
+ # Define sessions table
469
+ self._sessions_table = Table(
470
+ f"{self._config.table_prefix}sessions",
471
+ self._metadata,
472
+ Column("id", Integer, primary_key=True, autoincrement=True),
473
+ Column("session_id", String(255), unique=True, nullable=False, index=True),
474
+ Column("run_id", String(255), nullable=False, index=True),
475
+ Column("data_asset", String(500), nullable=False),
476
+ Column("status", String(50), nullable=False),
477
+ Column("metadata_json", Text),
478
+ Column("metrics_json", Text),
479
+ Column("chunks_json", Text),
480
+ Column("started_at", DateTime, nullable=False),
481
+ Column("updated_at", DateTime, nullable=False),
482
+ Column("checkpoint_offset", Integer, default=0),
483
+ )
484
+
485
+ # Create tables
486
+ self._metadata.create_all(self._engine)
487
+
488
+ def close(self) -> None:
489
+ """Close the store and database connections."""
490
+ super().close()
491
+ if self._engine is not None:
492
+ self._engine.dispose()
493
+ self._engine = None
494
+
495
+ # -------------------------------------------------------------------------
496
+ # Session Management
497
+ # -------------------------------------------------------------------------
498
+
499
+ def create_session(
500
+ self,
501
+ run_id: str,
502
+ data_asset: str,
503
+ metadata: dict[str, Any] | None = None,
504
+ ) -> StreamSession:
505
+ """Create a new streaming session."""
506
+ self.initialize()
507
+
508
+ session_id = f"{run_id}_{uuid4().hex[:8]}"
509
+ session = StreamSession(
510
+ session_id=session_id,
511
+ run_id=run_id,
512
+ data_asset=data_asset,
513
+ status=StreamStatus.PENDING,
514
+ config=self._config,
515
+ metadata=metadata or {},
516
+ )
517
+
518
+ # Save to database
519
+ self._save_session(session)
520
+ self._active_sessions[session_id] = session
521
+ return session
522
+
523
+ def get_session(self, session_id: str) -> StreamSession | None:
524
+ """Get an existing session."""
525
+ if session_id in self._active_sessions:
526
+ return self._active_sessions[session_id]
527
+
528
+ # Load from database
529
+ self.initialize()
530
+ from sqlalchemy import text
531
+
532
+ with self._engine.connect() as conn:
533
+ result = conn.execute(
534
+ text(
535
+ f"""
536
+ SELECT * FROM {self._sessions_table.name}
537
+ WHERE session_id = :session_id
538
+ """
539
+ ),
540
+ {"session_id": session_id},
541
+ )
542
+ row = result.fetchone()
543
+
544
+ if row is None:
545
+ return None
546
+
547
+ return StreamSession(
548
+ session_id=row.session_id,
549
+ run_id=row.run_id,
550
+ data_asset=row.data_asset,
551
+ status=StreamStatus(row.status),
552
+ metadata=json.loads(row.metadata_json) if row.metadata_json else {},
553
+ metrics=StreamingMetrics(**json.loads(row.metrics_json))
554
+ if row.metrics_json
555
+ else StreamingMetrics(),
556
+ chunks=[ChunkInfo.from_dict(c) for c in json.loads(row.chunks_json)]
557
+ if row.chunks_json
558
+ else [],
559
+ started_at=row.started_at,
560
+ updated_at=row.updated_at,
561
+ checkpoint_offset=row.checkpoint_offset,
562
+ )
563
+
564
+ def resume_session(self, session_id: str) -> StreamSession:
565
+ """Resume an interrupted session."""
566
+ session = self.get_session(session_id)
567
+ if session is None:
568
+ raise ValueError(f"Session not found: {session_id}")
569
+
570
+ if session.status == StreamStatus.COMPLETED:
571
+ raise ValueError(f"Session already completed: {session_id}")
572
+
573
+ session.status = StreamStatus.ACTIVE
574
+ session.updated_at = datetime.now()
575
+ self._save_session(session)
576
+
577
+ self._active_sessions[session_id] = session
578
+ return session
579
+
580
+ def _close_session(self, session: StreamSession) -> None:
581
+ """Close and finalize a session."""
582
+ if session.session_id in self._active_sessions:
583
+ del self._active_sessions[session.session_id]
584
+ session.status = StreamStatus.COMPLETED
585
+ session.updated_at = datetime.now()
586
+ self._save_session(session)
587
+
588
+ def _save_session(self, session: StreamSession) -> None:
589
+ """Save session to database."""
590
+ from sqlalchemy import text
591
+
592
+ db_session = self._session_maker()
593
+ try:
594
+ # Check if exists
595
+ result = db_session.execute(
596
+ text(
597
+ f"""
598
+ SELECT id FROM {self._sessions_table.name}
599
+ WHERE session_id = :session_id
600
+ """
601
+ ),
602
+ {"session_id": session.session_id},
603
+ )
604
+ existing = result.fetchone()
605
+
606
+ if existing:
607
+ # Update
608
+ db_session.execute(
609
+ text(
610
+ f"""
611
+ UPDATE {self._sessions_table.name}
612
+ SET status = :status,
613
+ metadata_json = :metadata_json,
614
+ metrics_json = :metrics_json,
615
+ chunks_json = :chunks_json,
616
+ updated_at = :updated_at,
617
+ checkpoint_offset = :checkpoint_offset
618
+ WHERE session_id = :session_id
619
+ """
620
+ ),
621
+ {
622
+ "session_id": session.session_id,
623
+ "status": session.status.value,
624
+ "metadata_json": json.dumps(session.metadata, default=str),
625
+ "metrics_json": json.dumps(session.metrics.to_dict()),
626
+ "chunks_json": json.dumps(
627
+ [c.to_dict() for c in session.chunks]
628
+ ),
629
+ "updated_at": session.updated_at,
630
+ "checkpoint_offset": session.checkpoint_offset,
631
+ },
632
+ )
633
+ else:
634
+ # Insert
635
+ db_session.execute(
636
+ self._sessions_table.insert().values(
637
+ session_id=session.session_id,
638
+ run_id=session.run_id,
639
+ data_asset=session.data_asset,
640
+ status=session.status.value,
641
+ metadata_json=json.dumps(session.metadata, default=str),
642
+ metrics_json=json.dumps(session.metrics.to_dict()),
643
+ chunks_json=json.dumps([c.to_dict() for c in session.chunks]),
644
+ started_at=session.started_at,
645
+ updated_at=session.updated_at,
646
+ checkpoint_offset=session.checkpoint_offset,
647
+ )
648
+ )
649
+ db_session.commit()
650
+ except Exception:
651
+ db_session.rollback()
652
+ raise
653
+ finally:
654
+ db_session.close()
655
+
656
+ # -------------------------------------------------------------------------
657
+ # Writer Operations
658
+ # -------------------------------------------------------------------------
659
+
660
+ def create_writer(self, session: StreamSession) -> DatabaseStreamWriter:
661
+ """Create a writer for the session."""
662
+ self.initialize()
663
+ return DatabaseStreamWriter(
664
+ session=session,
665
+ config=self._config,
666
+ engine=self._engine,
667
+ session_maker=self._session_maker,
668
+ results_table=self._results_table,
669
+ )
670
+
671
+ async def create_async_writer(self, session: StreamSession) -> AsyncStreamWriter:
672
+ """Create an async writer for the session."""
673
+ writer = self.create_writer(session)
674
+ return AsyncStreamWriter(writer)
675
+
676
+ # -------------------------------------------------------------------------
677
+ # Reader Operations
678
+ # -------------------------------------------------------------------------
679
+
680
+ def create_reader(self, run_id: str) -> DatabaseStreamReader:
681
+ """Create a reader for a run's results."""
682
+ self.initialize()
683
+ return DatabaseStreamReader(
684
+ run_id=run_id,
685
+ engine=self._engine,
686
+ config=self._config,
687
+ results_table=self._results_table,
688
+ )
689
+
690
+ async def create_async_reader(self, run_id: str) -> AsyncStreamReader:
691
+ """Create an async reader for a run's results."""
692
+ reader = self.create_reader(run_id)
693
+ return AsyncStreamReader(reader)
694
+
695
+ def iter_results(
696
+ self,
697
+ run_id: str,
698
+ batch_size: int = 1000,
699
+ ) -> Iterator["ValidatorResult"]:
700
+ """Iterate over results for a run using cursor."""
701
+ reader = self.create_reader(run_id)
702
+ with reader:
703
+ yield from reader.iter_results()
704
+
705
+ async def aiter_results(
706
+ self,
707
+ run_id: str,
708
+ batch_size: int = 1000,
709
+ ) -> AsyncIterator["ValidatorResult"]:
710
+ """Async iterate over results for a run."""
711
+ reader = await self.create_async_reader(run_id)
712
+ async with reader:
713
+ async for result in reader.aiter_results():
714
+ yield result
715
+
716
+ # -------------------------------------------------------------------------
717
+ # Chunk Management
718
+ # -------------------------------------------------------------------------
719
+
720
+ def list_chunks(self, run_id: str) -> list[ChunkInfo]:
721
+ """List all chunks for a run."""
722
+ self.initialize()
723
+ reader = self.create_reader(run_id)
724
+ return reader._chunks
725
+
726
+ def get_chunk(self, chunk_info: ChunkInfo) -> list["ValidatorResult"]:
727
+ """Get records from a specific chunk."""
728
+ from truthound.stores.results import ValidatorResult
729
+ from sqlalchemy import text
730
+
731
+ run_id = chunk_info.path.replace("db://", "").split("/")[0]
732
+
733
+ with self._engine.connect() as conn:
734
+ result = conn.execute(
735
+ text(
736
+ f"""
737
+ SELECT data_json
738
+ FROM {self._results_table.name}
739
+ WHERE run_id = :run_id AND chunk_id = :chunk_id
740
+ ORDER BY record_index
741
+ """
742
+ ),
743
+ {"run_id": run_id, "chunk_id": chunk_info.chunk_id},
744
+ )
745
+
746
+ return [ValidatorResult.from_dict(json.loads(row.data_json)) for row in result]
747
+
748
+ def delete_chunks(self, run_id: str) -> int:
749
+ """Delete all chunks for a run."""
750
+ self.initialize()
751
+ from sqlalchemy import text
752
+
753
+ with self._engine.connect() as conn:
754
+ # Get count first
755
+ result = conn.execute(
756
+ text(
757
+ f"""
758
+ SELECT COUNT(DISTINCT chunk_id) as count
759
+ FROM {self._results_table.name}
760
+ WHERE run_id = :run_id
761
+ """
762
+ ),
763
+ {"run_id": run_id},
764
+ )
765
+ count = result.scalar() or 0
766
+
767
+ # Delete results
768
+ conn.execute(
769
+ text(
770
+ f"""
771
+ DELETE FROM {self._results_table.name}
772
+ WHERE run_id = :run_id
773
+ """
774
+ ),
775
+ {"run_id": run_id},
776
+ )
777
+
778
+ # Delete session
779
+ conn.execute(
780
+ text(
781
+ f"""
782
+ DELETE FROM {self._sessions_table.name}
783
+ WHERE run_id = :run_id
784
+ """
785
+ ),
786
+ {"run_id": run_id},
787
+ )
788
+
789
+ conn.commit()
790
+ return count
791
+
792
+ # -------------------------------------------------------------------------
793
+ # Validation Result Operations
794
+ # -------------------------------------------------------------------------
795
+
796
+ def stream_write_result(
797
+ self,
798
+ session: StreamSession,
799
+ result: "ValidatorResult",
800
+ ) -> None:
801
+ """Write a single validator result to the stream."""
802
+ if session.session_id not in self._active_sessions:
803
+ raise ValueError(f"Session not active: {session.session_id}")
804
+
805
+ writer = self._get_or_create_writer(session)
806
+ writer.write_result(result)
807
+
808
+ def stream_write_batch(
809
+ self,
810
+ session: StreamSession,
811
+ results: list["ValidatorResult"],
812
+ ) -> None:
813
+ """Write a batch of validator results to the stream."""
814
+ if session.session_id not in self._active_sessions:
815
+ raise ValueError(f"Session not active: {session.session_id}")
816
+
817
+ writer = self._get_or_create_writer(session)
818
+ writer.write_results(results)
819
+
820
+ def _get_or_create_writer(self, session: StreamSession) -> DatabaseStreamWriter:
821
+ """Get or create a writer for a session."""
822
+ writer_key = f"_writer_{session.session_id}"
823
+ if not hasattr(self, writer_key):
824
+ writer = self.create_writer(session)
825
+ setattr(self, writer_key, writer)
826
+ return getattr(self, writer_key)
827
+
828
+ def finalize_result(
829
+ self,
830
+ session: StreamSession,
831
+ additional_metadata: dict[str, Any] | None = None,
832
+ ) -> "ValidationResult":
833
+ """Finalize the streaming session and create a ValidationResult."""
834
+ from truthound.stores.results import (
835
+ ResultStatistics,
836
+ ResultStatus,
837
+ ValidationResult,
838
+ )
839
+
840
+ # Close any active writer
841
+ writer_key = f"_writer_{session.session_id}"
842
+ if hasattr(self, writer_key):
843
+ writer = getattr(self, writer_key)
844
+ writer.close()
845
+ delattr(self, writer_key)
846
+
847
+ # Aggregate statistics using cursor
848
+ total_validators = 0
849
+ passed_validators = 0
850
+ failed_validators = 0
851
+ severity_counts = {"critical": 0, "high": 0, "medium": 0, "low": 0}
852
+
853
+ all_results: list["ValidatorResult"] = []
854
+ for result in self.iter_results(session.run_id):
855
+ all_results.append(result)
856
+ total_validators += 1
857
+ if result.success:
858
+ passed_validators += 1
859
+ else:
860
+ failed_validators += 1
861
+ if result.severity and result.severity in severity_counts:
862
+ severity_counts[result.severity] += 1
863
+
864
+ # Determine status
865
+ if severity_counts["critical"] > 0:
866
+ status = ResultStatus.FAILURE
867
+ elif failed_validators > 0:
868
+ status = ResultStatus.WARNING
869
+ else:
870
+ status = ResultStatus.SUCCESS
871
+
872
+ statistics = ResultStatistics(
873
+ total_validators=total_validators,
874
+ passed_validators=passed_validators,
875
+ failed_validators=failed_validators,
876
+ total_issues=failed_validators,
877
+ critical_issues=severity_counts["critical"],
878
+ high_issues=severity_counts["high"],
879
+ medium_issues=severity_counts["medium"],
880
+ low_issues=severity_counts["low"],
881
+ )
882
+
883
+ metadata = session.metadata.copy()
884
+ if additional_metadata:
885
+ metadata.update(additional_metadata)
886
+ metadata["streaming"] = {
887
+ "storage": "database",
888
+ "chunks": len(session.chunks),
889
+ "total_records": session.metrics.records_written,
890
+ }
891
+
892
+ result = ValidationResult(
893
+ run_id=session.run_id,
894
+ run_time=session.started_at,
895
+ data_asset=session.data_asset,
896
+ status=status,
897
+ results=all_results,
898
+ statistics=statistics,
899
+ metadata=metadata,
900
+ )
901
+
902
+ self._close_session(session)
903
+ return result
904
+
905
+ def get_streaming_stats(self, run_id: str) -> dict[str, Any]:
906
+ """Get statistics about a streaming run."""
907
+ self.initialize()
908
+ from sqlalchemy import text
909
+
910
+ with self._engine.connect() as conn:
911
+ # Get record count
912
+ result = conn.execute(
913
+ text(
914
+ f"""
915
+ SELECT COUNT(*) as count
916
+ FROM {self._results_table.name}
917
+ WHERE run_id = :run_id
918
+ """
919
+ ),
920
+ {"run_id": run_id},
921
+ )
922
+ record_count = result.scalar() or 0
923
+
924
+ # Get session info
925
+ result = conn.execute(
926
+ text(
927
+ f"""
928
+ SELECT * FROM {self._sessions_table.name}
929
+ WHERE run_id = :run_id
930
+ """
931
+ ),
932
+ {"run_id": run_id},
933
+ )
934
+ row = result.fetchone()
935
+
936
+ if row is None:
937
+ return {"run_id": run_id, "record_count": record_count}
938
+
939
+ return {
940
+ "run_id": run_id,
941
+ "data_asset": row.data_asset,
942
+ "status": row.status,
943
+ "record_count": record_count,
944
+ "storage": "database",
945
+ "started_at": row.started_at.isoformat() if row.started_at else None,
946
+ "updated_at": row.updated_at.isoformat() if row.updated_at else None,
947
+ }
948
+
949
+ # -------------------------------------------------------------------------
950
+ # Utility Methods
951
+ # -------------------------------------------------------------------------
952
+
953
+ def list_runs(self) -> list[str]:
954
+ """List all run IDs in the store."""
955
+ self.initialize()
956
+ from sqlalchemy import text
957
+
958
+ with self._engine.connect() as conn:
959
+ result = conn.execute(
960
+ text(
961
+ f"""
962
+ SELECT DISTINCT run_id FROM {self._sessions_table.name}
963
+ ORDER BY run_id
964
+ """
965
+ )
966
+ )
967
+ return [row.run_id for row in result]
968
+
969
+ def get_record_count(self, run_id: str) -> int:
970
+ """Get total record count for a run."""
971
+ self.initialize()
972
+ from sqlalchemy import text
973
+
974
+ with self._engine.connect() as conn:
975
+ result = conn.execute(
976
+ text(
977
+ f"""
978
+ SELECT COUNT(*) FROM {self._results_table.name}
979
+ WHERE run_id = :run_id
980
+ """
981
+ ),
982
+ {"run_id": run_id},
983
+ )
984
+ return result.scalar() or 0