truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,801 @@
1
+ """Base classes and protocols for streaming storage.
2
+
3
+ This module defines the abstract interfaces and protocols that all streaming
4
+ store implementations must follow. Streaming stores enable handling of
5
+ validation results that exceed available memory.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ from abc import ABC, abstractmethod
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime
14
+ from enum import Enum
15
+ from typing import (
16
+ TYPE_CHECKING,
17
+ Any,
18
+ AsyncIterator,
19
+ Generic,
20
+ Iterator,
21
+ Protocol,
22
+ TypeVar,
23
+ runtime_checkable,
24
+ )
25
+
26
+ if TYPE_CHECKING:
27
+ from truthound.stores.results import ValidationResult, ValidatorResult
28
+
29
+
30
+ # =============================================================================
31
+ # Enums
32
+ # =============================================================================
33
+
34
+
35
+ class StreamingFormat(str, Enum):
36
+ """Supported streaming formats."""
37
+
38
+ JSONL = "jsonl" # JSON Lines - one JSON object per line
39
+ NDJSON = "ndjson" # Newline Delimited JSON (same as JSONL)
40
+ CSV = "csv" # CSV with header
41
+ PARQUET = "parquet" # Columnar format for analytics
42
+
43
+
44
+ class CompressionType(str, Enum):
45
+ """Supported compression types for streaming."""
46
+
47
+ NONE = "none"
48
+ GZIP = "gzip"
49
+ ZSTD = "zstd"
50
+ LZ4 = "lz4"
51
+ SNAPPY = "snappy"
52
+
53
+
54
+ class StreamStatus(str, Enum):
55
+ """Status of a streaming operation."""
56
+
57
+ PENDING = "pending"
58
+ ACTIVE = "active"
59
+ PAUSED = "paused"
60
+ COMPLETED = "completed"
61
+ FAILED = "failed"
62
+ ABORTED = "aborted"
63
+
64
+
65
+ # =============================================================================
66
+ # Configuration
67
+ # =============================================================================
68
+
69
+
70
+ @dataclass
71
+ class StreamingConfig:
72
+ """Configuration for streaming storage operations.
73
+
74
+ Attributes:
75
+ format: Output format (jsonl, csv, parquet).
76
+ compression: Compression algorithm to use.
77
+ chunk_size: Number of records per chunk/file.
78
+ buffer_size: In-memory buffer size before flush.
79
+ max_memory_mb: Maximum memory usage in MB.
80
+ flush_interval_seconds: Auto-flush interval.
81
+ enable_checkpoints: Enable periodic checkpoints for recovery.
82
+ checkpoint_interval: Records between checkpoints.
83
+ enable_metrics: Collect streaming metrics.
84
+ max_retries: Maximum retry attempts on failure.
85
+ retry_delay_seconds: Base delay between retries.
86
+ """
87
+
88
+ format: StreamingFormat = StreamingFormat.JSONL
89
+ compression: CompressionType = CompressionType.NONE
90
+ chunk_size: int = 10000
91
+ buffer_size: int = 1000
92
+ max_memory_mb: int = 512
93
+ flush_interval_seconds: float = 30.0
94
+ enable_checkpoints: bool = True
95
+ checkpoint_interval: int = 10000
96
+ enable_metrics: bool = True
97
+ max_retries: int = 3
98
+ retry_delay_seconds: float = 1.0
99
+
100
+ def validate(self) -> None:
101
+ """Validate configuration values."""
102
+ if self.chunk_size <= 0:
103
+ raise ValueError("chunk_size must be positive")
104
+ if self.buffer_size <= 0:
105
+ raise ValueError("buffer_size must be positive")
106
+ if self.max_memory_mb <= 0:
107
+ raise ValueError("max_memory_mb must be positive")
108
+ if self.flush_interval_seconds < 0:
109
+ raise ValueError("flush_interval_seconds must be non-negative")
110
+ if self.checkpoint_interval <= 0:
111
+ raise ValueError("checkpoint_interval must be positive")
112
+
113
+
114
+ # =============================================================================
115
+ # Metrics and Monitoring
116
+ # =============================================================================
117
+
118
+
119
+ @dataclass
120
+ class StreamingMetrics:
121
+ """Metrics collected during streaming operations.
122
+
123
+ Attributes:
124
+ records_written: Total records written.
125
+ records_read: Total records read.
126
+ bytes_written: Total bytes written (after compression).
127
+ bytes_read: Total bytes read.
128
+ chunks_written: Number of chunks/files written.
129
+ chunks_read: Number of chunks/files read.
130
+ flush_count: Number of buffer flushes.
131
+ retry_count: Number of retry attempts.
132
+ errors: List of errors encountered.
133
+ start_time: When streaming started.
134
+ end_time: When streaming ended.
135
+ peak_memory_mb: Peak memory usage in MB.
136
+ average_throughput: Records per second.
137
+ """
138
+
139
+ records_written: int = 0
140
+ records_read: int = 0
141
+ bytes_written: int = 0
142
+ bytes_read: int = 0
143
+ chunks_written: int = 0
144
+ chunks_read: int = 0
145
+ flush_count: int = 0
146
+ retry_count: int = 0
147
+ errors: list[str] = field(default_factory=list)
148
+ start_time: datetime | None = None
149
+ end_time: datetime | None = None
150
+ peak_memory_mb: float = 0.0
151
+ average_throughput: float = 0.0
152
+
153
+ def record_write(self, count: int = 1, bytes_count: int = 0) -> None:
154
+ """Record a write operation."""
155
+ self.records_written += count
156
+ self.bytes_written += bytes_count
157
+
158
+ def record_read(self, count: int = 1, bytes_count: int = 0) -> None:
159
+ """Record a read operation."""
160
+ self.records_read += count
161
+ self.bytes_read += bytes_count
162
+
163
+ def record_chunk(self, is_write: bool = True) -> None:
164
+ """Record a chunk operation."""
165
+ if is_write:
166
+ self.chunks_written += 1
167
+ else:
168
+ self.chunks_read += 1
169
+
170
+ def record_error(self, error: str) -> None:
171
+ """Record an error."""
172
+ self.errors.append(error)
173
+
174
+ def start(self) -> None:
175
+ """Mark streaming start."""
176
+ self.start_time = datetime.now()
177
+
178
+ def finish(self) -> None:
179
+ """Mark streaming end and calculate throughput."""
180
+ self.end_time = datetime.now()
181
+ if self.start_time and self.end_time:
182
+ duration = (self.end_time - self.start_time).total_seconds()
183
+ if duration > 0:
184
+ total_records = self.records_written + self.records_read
185
+ self.average_throughput = total_records / duration
186
+
187
+ def to_dict(self) -> dict[str, Any]:
188
+ """Convert to dictionary for serialization."""
189
+ return {
190
+ "records_written": self.records_written,
191
+ "records_read": self.records_read,
192
+ "bytes_written": self.bytes_written,
193
+ "bytes_read": self.bytes_read,
194
+ "chunks_written": self.chunks_written,
195
+ "chunks_read": self.chunks_read,
196
+ "flush_count": self.flush_count,
197
+ "retry_count": self.retry_count,
198
+ "errors": self.errors,
199
+ "start_time": self.start_time.isoformat() if self.start_time else None,
200
+ "end_time": self.end_time.isoformat() if self.end_time else None,
201
+ "peak_memory_mb": self.peak_memory_mb,
202
+ "average_throughput": self.average_throughput,
203
+ }
204
+
205
+
206
+ # =============================================================================
207
+ # Chunk Management
208
+ # =============================================================================
209
+
210
+
211
+ @dataclass
212
+ class ChunkInfo:
213
+ """Information about a stored chunk.
214
+
215
+ Attributes:
216
+ chunk_id: Unique identifier for the chunk.
217
+ chunk_index: Sequential index of the chunk.
218
+ record_count: Number of records in the chunk.
219
+ byte_size: Size of the chunk in bytes.
220
+ start_offset: Starting record offset.
221
+ end_offset: Ending record offset.
222
+ checksum: Optional checksum for integrity.
223
+ created_at: When the chunk was created.
224
+ path: Storage path/key for the chunk.
225
+ """
226
+
227
+ chunk_id: str
228
+ chunk_index: int
229
+ record_count: int
230
+ byte_size: int
231
+ start_offset: int
232
+ end_offset: int
233
+ checksum: str | None = None
234
+ created_at: datetime = field(default_factory=datetime.now)
235
+ path: str = ""
236
+
237
+ def to_dict(self) -> dict[str, Any]:
238
+ """Convert to dictionary."""
239
+ return {
240
+ "chunk_id": self.chunk_id,
241
+ "chunk_index": self.chunk_index,
242
+ "record_count": self.record_count,
243
+ "byte_size": self.byte_size,
244
+ "start_offset": self.start_offset,
245
+ "end_offset": self.end_offset,
246
+ "checksum": self.checksum,
247
+ "created_at": self.created_at.isoformat(),
248
+ "path": self.path,
249
+ }
250
+
251
+ @classmethod
252
+ def from_dict(cls, data: dict[str, Any]) -> "ChunkInfo":
253
+ """Create from dictionary."""
254
+ return cls(
255
+ chunk_id=data["chunk_id"],
256
+ chunk_index=data["chunk_index"],
257
+ record_count=data["record_count"],
258
+ byte_size=data["byte_size"],
259
+ start_offset=data["start_offset"],
260
+ end_offset=data["end_offset"],
261
+ checksum=data.get("checksum"),
262
+ created_at=datetime.fromisoformat(data["created_at"]),
263
+ path=data.get("path", ""),
264
+ )
265
+
266
+
267
+ @dataclass
268
+ class StreamSession:
269
+ """Session information for a streaming operation.
270
+
271
+ Attributes:
272
+ session_id: Unique identifier for the session.
273
+ run_id: Associated validation run ID.
274
+ data_asset: Name of the data asset being validated.
275
+ status: Current status of the stream.
276
+ config: Streaming configuration.
277
+ metrics: Collected metrics.
278
+ chunks: List of written chunks.
279
+ metadata: Additional session metadata.
280
+ started_at: When the session started.
281
+ updated_at: Last update time.
282
+ checkpoint_offset: Last checkpointed offset.
283
+ """
284
+
285
+ session_id: str
286
+ run_id: str
287
+ data_asset: str
288
+ status: StreamStatus = StreamStatus.PENDING
289
+ config: StreamingConfig = field(default_factory=StreamingConfig)
290
+ metrics: StreamingMetrics = field(default_factory=StreamingMetrics)
291
+ chunks: list[ChunkInfo] = field(default_factory=list)
292
+ metadata: dict[str, Any] = field(default_factory=dict)
293
+ started_at: datetime = field(default_factory=datetime.now)
294
+ updated_at: datetime = field(default_factory=datetime.now)
295
+ checkpoint_offset: int = 0
296
+
297
+ def to_dict(self) -> dict[str, Any]:
298
+ """Convert to dictionary."""
299
+ return {
300
+ "session_id": self.session_id,
301
+ "run_id": self.run_id,
302
+ "data_asset": self.data_asset,
303
+ "status": self.status.value,
304
+ "metrics": self.metrics.to_dict(),
305
+ "chunks": [c.to_dict() for c in self.chunks],
306
+ "metadata": self.metadata,
307
+ "started_at": self.started_at.isoformat(),
308
+ "updated_at": self.updated_at.isoformat(),
309
+ "checkpoint_offset": self.checkpoint_offset,
310
+ }
311
+
312
+ @classmethod
313
+ def from_dict(cls, data: dict[str, Any]) -> "StreamSession":
314
+ """Create from dictionary."""
315
+ return cls(
316
+ session_id=data["session_id"],
317
+ run_id=data["run_id"],
318
+ data_asset=data["data_asset"],
319
+ status=StreamStatus(data.get("status", "pending")),
320
+ metrics=StreamingMetrics(**data.get("metrics", {})),
321
+ chunks=[ChunkInfo.from_dict(c) for c in data.get("chunks", [])],
322
+ metadata=data.get("metadata", {}),
323
+ started_at=datetime.fromisoformat(data["started_at"]),
324
+ updated_at=datetime.fromisoformat(data["updated_at"]),
325
+ checkpoint_offset=data.get("checkpoint_offset", 0),
326
+ )
327
+
328
+
329
+ # =============================================================================
330
+ # Protocols
331
+ # =============================================================================
332
+
333
+
334
+ @runtime_checkable
335
+ class StreamingWriter(Protocol):
336
+ """Protocol for streaming writers."""
337
+
338
+ def write(self, record: dict[str, Any]) -> None:
339
+ """Write a single record."""
340
+ ...
341
+
342
+ def write_batch(self, records: list[dict[str, Any]]) -> None:
343
+ """Write a batch of records."""
344
+ ...
345
+
346
+ def flush(self) -> None:
347
+ """Flush buffered records to storage."""
348
+ ...
349
+
350
+ def close(self) -> None:
351
+ """Close the writer and finalize."""
352
+ ...
353
+
354
+
355
+ @runtime_checkable
356
+ class StreamingReader(Protocol):
357
+ """Protocol for streaming readers."""
358
+
359
+ def read(self) -> dict[str, Any] | None:
360
+ """Read a single record."""
361
+ ...
362
+
363
+ def read_batch(self, size: int) -> list[dict[str, Any]]:
364
+ """Read a batch of records."""
365
+ ...
366
+
367
+ def __iter__(self) -> Iterator[dict[str, Any]]:
368
+ """Iterate over records."""
369
+ ...
370
+
371
+ def close(self) -> None:
372
+ """Close the reader."""
373
+ ...
374
+
375
+
376
+ @runtime_checkable
377
+ class AsyncStreamingWriter(Protocol):
378
+ """Protocol for async streaming writers."""
379
+
380
+ async def write(self, record: dict[str, Any]) -> None:
381
+ """Write a single record asynchronously."""
382
+ ...
383
+
384
+ async def write_batch(self, records: list[dict[str, Any]]) -> None:
385
+ """Write a batch of records asynchronously."""
386
+ ...
387
+
388
+ async def flush(self) -> None:
389
+ """Flush buffered records to storage."""
390
+ ...
391
+
392
+ async def close(self) -> None:
393
+ """Close the writer and finalize."""
394
+ ...
395
+
396
+
397
+ @runtime_checkable
398
+ class AsyncStreamingReader(Protocol):
399
+ """Protocol for async streaming readers."""
400
+
401
+ async def read(self) -> dict[str, Any] | None:
402
+ """Read a single record asynchronously."""
403
+ ...
404
+
405
+ async def read_batch(self, size: int) -> list[dict[str, Any]]:
406
+ """Read a batch of records asynchronously."""
407
+ ...
408
+
409
+ def __aiter__(self) -> AsyncIterator[dict[str, Any]]:
410
+ """Async iterate over records."""
411
+ ...
412
+
413
+ async def close(self) -> None:
414
+ """Close the reader."""
415
+ ...
416
+
417
+
418
+ # =============================================================================
419
+ # Abstract Base Classes
420
+ # =============================================================================
421
+
422
+
423
+ T = TypeVar("T")
424
+ ConfigT = TypeVar("ConfigT", bound=StreamingConfig)
425
+
426
+
427
+ class StreamingStore(ABC, Generic[T, ConfigT]):
428
+ """Abstract base class for streaming stores.
429
+
430
+ Streaming stores handle large-scale data that cannot fit in memory.
431
+ They support incremental writing and reading through chunked operations.
432
+
433
+ Type Parameters:
434
+ T: The type of objects being stored.
435
+ ConfigT: The configuration type for this store.
436
+ """
437
+
438
+ def __init__(self, config: ConfigT | None = None) -> None:
439
+ """Initialize the streaming store.
440
+
441
+ Args:
442
+ config: Streaming configuration.
443
+ """
444
+ self._config = config or self._default_config()
445
+ self._config.validate()
446
+ self._initialized = False
447
+ self._active_sessions: dict[str, StreamSession] = {}
448
+
449
+ @classmethod
450
+ @abstractmethod
451
+ def _default_config(cls) -> ConfigT:
452
+ """Create default configuration."""
453
+ pass
454
+
455
+ @property
456
+ def config(self) -> ConfigT:
457
+ """Get the store configuration."""
458
+ return self._config
459
+
460
+ # -------------------------------------------------------------------------
461
+ # Lifecycle
462
+ # -------------------------------------------------------------------------
463
+
464
+ def initialize(self) -> None:
465
+ """Initialize the store."""
466
+ if not self._initialized:
467
+ self._do_initialize()
468
+ self._initialized = True
469
+
470
+ @abstractmethod
471
+ def _do_initialize(self) -> None:
472
+ """Perform actual initialization."""
473
+ pass
474
+
475
+ def close(self) -> None:
476
+ """Close the store and all active sessions."""
477
+ for session in list(self._active_sessions.values()):
478
+ self._close_session(session)
479
+ self._active_sessions.clear()
480
+
481
+ def __enter__(self) -> "StreamingStore[T, ConfigT]":
482
+ """Context manager entry."""
483
+ self.initialize()
484
+ return self
485
+
486
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
487
+ """Context manager exit."""
488
+ self.close()
489
+
490
+ # -------------------------------------------------------------------------
491
+ # Session Management
492
+ # -------------------------------------------------------------------------
493
+
494
+ @abstractmethod
495
+ def create_session(
496
+ self,
497
+ run_id: str,
498
+ data_asset: str,
499
+ metadata: dict[str, Any] | None = None,
500
+ ) -> StreamSession:
501
+ """Create a new streaming session.
502
+
503
+ Args:
504
+ run_id: Validation run identifier.
505
+ data_asset: Name of the data asset.
506
+ metadata: Optional session metadata.
507
+
508
+ Returns:
509
+ A new streaming session.
510
+ """
511
+ pass
512
+
513
+ @abstractmethod
514
+ def get_session(self, session_id: str) -> StreamSession | None:
515
+ """Get an existing session.
516
+
517
+ Args:
518
+ session_id: Session identifier.
519
+
520
+ Returns:
521
+ The session if found, None otherwise.
522
+ """
523
+ pass
524
+
525
+ @abstractmethod
526
+ def resume_session(self, session_id: str) -> StreamSession:
527
+ """Resume an interrupted session.
528
+
529
+ Args:
530
+ session_id: Session identifier.
531
+
532
+ Returns:
533
+ The resumed session.
534
+
535
+ Raises:
536
+ ValueError: If session cannot be resumed.
537
+ """
538
+ pass
539
+
540
+ @abstractmethod
541
+ def _close_session(self, session: StreamSession) -> None:
542
+ """Close and finalize a session."""
543
+ pass
544
+
545
+ # -------------------------------------------------------------------------
546
+ # Streaming Write Operations
547
+ # -------------------------------------------------------------------------
548
+
549
+ @abstractmethod
550
+ def create_writer(self, session: StreamSession) -> StreamingWriter:
551
+ """Create a writer for the session.
552
+
553
+ Args:
554
+ session: The streaming session.
555
+
556
+ Returns:
557
+ A streaming writer instance.
558
+ """
559
+ pass
560
+
561
+ @abstractmethod
562
+ async def create_async_writer(
563
+ self, session: StreamSession
564
+ ) -> AsyncStreamingWriter:
565
+ """Create an async writer for the session.
566
+
567
+ Args:
568
+ session: The streaming session.
569
+
570
+ Returns:
571
+ An async streaming writer instance.
572
+ """
573
+ pass
574
+
575
+ # -------------------------------------------------------------------------
576
+ # Streaming Read Operations
577
+ # -------------------------------------------------------------------------
578
+
579
+ @abstractmethod
580
+ def create_reader(self, run_id: str) -> StreamingReader:
581
+ """Create a reader for a run's results.
582
+
583
+ Args:
584
+ run_id: The run ID to read.
585
+
586
+ Returns:
587
+ A streaming reader instance.
588
+ """
589
+ pass
590
+
591
+ @abstractmethod
592
+ async def create_async_reader(self, run_id: str) -> AsyncStreamingReader:
593
+ """Create an async reader for a run's results.
594
+
595
+ Args:
596
+ run_id: The run ID to read.
597
+
598
+ Returns:
599
+ An async streaming reader instance.
600
+ """
601
+ pass
602
+
603
+ @abstractmethod
604
+ def iter_results(
605
+ self,
606
+ run_id: str,
607
+ batch_size: int = 1000,
608
+ ) -> Iterator[T]:
609
+ """Iterate over results for a run.
610
+
611
+ Args:
612
+ run_id: The run ID to iterate.
613
+ batch_size: Number of records per batch.
614
+
615
+ Yields:
616
+ Individual result records.
617
+ """
618
+ pass
619
+
620
+ @abstractmethod
621
+ async def aiter_results(
622
+ self,
623
+ run_id: str,
624
+ batch_size: int = 1000,
625
+ ) -> AsyncIterator[T]:
626
+ """Async iterate over results for a run.
627
+
628
+ Args:
629
+ run_id: The run ID to iterate.
630
+ batch_size: Number of records per batch.
631
+
632
+ Yields:
633
+ Individual result records.
634
+ """
635
+ pass
636
+
637
+ # -------------------------------------------------------------------------
638
+ # Chunk Management
639
+ # -------------------------------------------------------------------------
640
+
641
+ @abstractmethod
642
+ def list_chunks(self, run_id: str) -> list[ChunkInfo]:
643
+ """List all chunks for a run.
644
+
645
+ Args:
646
+ run_id: The run ID.
647
+
648
+ Returns:
649
+ List of chunk information.
650
+ """
651
+ pass
652
+
653
+ @abstractmethod
654
+ def get_chunk(self, chunk_info: ChunkInfo) -> list[T]:
655
+ """Get records from a specific chunk.
656
+
657
+ Args:
658
+ chunk_info: The chunk to retrieve.
659
+
660
+ Returns:
661
+ Records from the chunk.
662
+ """
663
+ pass
664
+
665
+ @abstractmethod
666
+ def delete_chunks(self, run_id: str) -> int:
667
+ """Delete all chunks for a run.
668
+
669
+ Args:
670
+ run_id: The run ID.
671
+
672
+ Returns:
673
+ Number of chunks deleted.
674
+ """
675
+ pass
676
+
677
+
678
+ class StreamingValidationStore(StreamingStore["ValidatorResult", ConfigT], Generic[ConfigT]):
679
+ """Streaming store specialized for validation results.
680
+
681
+ Provides additional methods specific to validation result streaming,
682
+ including aggregation and statistics computation.
683
+ """
684
+
685
+ @abstractmethod
686
+ def stream_write_result(
687
+ self,
688
+ session: StreamSession,
689
+ result: "ValidatorResult",
690
+ ) -> None:
691
+ """Write a single validator result to the stream.
692
+
693
+ Args:
694
+ session: The streaming session.
695
+ result: The validator result to write.
696
+ """
697
+ pass
698
+
699
+ @abstractmethod
700
+ def stream_write_batch(
701
+ self,
702
+ session: StreamSession,
703
+ results: list["ValidatorResult"],
704
+ ) -> None:
705
+ """Write a batch of validator results to the stream.
706
+
707
+ Args:
708
+ session: The streaming session.
709
+ results: The validator results to write.
710
+ """
711
+ pass
712
+
713
+ @abstractmethod
714
+ def finalize_result(
715
+ self,
716
+ session: StreamSession,
717
+ additional_metadata: dict[str, Any] | None = None,
718
+ ) -> "ValidationResult":
719
+ """Finalize the streaming session and create a ValidationResult.
720
+
721
+ This aggregates all streamed results into a single ValidationResult
722
+ with computed statistics.
723
+
724
+ Args:
725
+ session: The streaming session.
726
+ additional_metadata: Optional additional metadata.
727
+
728
+ Returns:
729
+ The complete ValidationResult.
730
+ """
731
+ pass
732
+
733
+ @abstractmethod
734
+ def get_streaming_stats(self, run_id: str) -> dict[str, Any]:
735
+ """Get statistics about a streaming run.
736
+
737
+ Args:
738
+ run_id: The run ID.
739
+
740
+ Returns:
741
+ Statistics dictionary including record counts, errors, timing.
742
+ """
743
+ pass
744
+
745
+ def iter_failed_results(
746
+ self,
747
+ run_id: str,
748
+ batch_size: int = 1000,
749
+ ) -> Iterator["ValidatorResult"]:
750
+ """Iterate over failed results only.
751
+
752
+ Args:
753
+ run_id: The run ID.
754
+ batch_size: Number of records per batch.
755
+
756
+ Yields:
757
+ Failed validator results.
758
+ """
759
+ for result in self.iter_results(run_id, batch_size):
760
+ if not result.success:
761
+ yield result
762
+
763
+ def iter_results_by_column(
764
+ self,
765
+ run_id: str,
766
+ column: str,
767
+ batch_size: int = 1000,
768
+ ) -> Iterator["ValidatorResult"]:
769
+ """Iterate over results for a specific column.
770
+
771
+ Args:
772
+ run_id: The run ID.
773
+ column: Column name to filter by.
774
+ batch_size: Number of records per batch.
775
+
776
+ Yields:
777
+ Validator results for the specified column.
778
+ """
779
+ for result in self.iter_results(run_id, batch_size):
780
+ if result.column == column:
781
+ yield result
782
+
783
+ def iter_results_by_severity(
784
+ self,
785
+ run_id: str,
786
+ severity: str,
787
+ batch_size: int = 1000,
788
+ ) -> Iterator["ValidatorResult"]:
789
+ """Iterate over results with a specific severity.
790
+
791
+ Args:
792
+ run_id: The run ID.
793
+ severity: Severity level to filter by.
794
+ batch_size: Number of records per batch.
795
+
796
+ Yields:
797
+ Validator results with the specified severity.
798
+ """
799
+ for result in self.iter_results(run_id, batch_size):
800
+ if result.severity == severity:
801
+ yield result