truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,790 @@
1
+ """Streaming writers for incremental result storage.
2
+
3
+ This module provides writers that can incrementally write validation results
4
+ to storage without holding all results in memory.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ import gzip
11
+ import hashlib
12
+ import json
13
+ import threading
14
+ import time
15
+ from abc import ABC, abstractmethod
16
+ from contextlib import contextmanager
17
+ from dataclasses import dataclass, field
18
+ from datetime import datetime
19
+ from io import BytesIO, StringIO
20
+ from pathlib import Path
21
+ from typing import TYPE_CHECKING, Any, BinaryIO, Callable, Iterator, TextIO
22
+ from uuid import uuid4
23
+
24
+ from truthound.stores.streaming.base import (
25
+ ChunkInfo,
26
+ CompressionType,
27
+ StreamingConfig,
28
+ StreamingFormat,
29
+ StreamingMetrics,
30
+ StreamSession,
31
+ StreamStatus,
32
+ )
33
+
34
+ if TYPE_CHECKING:
35
+ from truthound.stores.results import ValidatorResult
36
+
37
+
38
+ # =============================================================================
39
+ # Exceptions
40
+ # =============================================================================
41
+
42
+
43
+ class StreamWriteError(Exception):
44
+ """Error during streaming write operation."""
45
+
46
+ pass
47
+
48
+
49
+ class StreamBufferOverflowError(StreamWriteError):
50
+ """Buffer exceeded maximum size."""
51
+
52
+ pass
53
+
54
+
55
+ class StreamFlushError(StreamWriteError):
56
+ """Error flushing buffer to storage."""
57
+
58
+ pass
59
+
60
+
61
+ # =============================================================================
62
+ # Serializers
63
+ # =============================================================================
64
+
65
+
66
+ class RecordSerializer(ABC):
67
+ """Abstract record serializer."""
68
+
69
+ @abstractmethod
70
+ def serialize(self, record: dict[str, Any]) -> bytes:
71
+ """Serialize a single record."""
72
+ pass
73
+
74
+ @abstractmethod
75
+ def serialize_batch(self, records: list[dict[str, Any]]) -> bytes:
76
+ """Serialize a batch of records."""
77
+ pass
78
+
79
+ @abstractmethod
80
+ def get_content_type(self) -> str:
81
+ """Get the content type for this format."""
82
+ pass
83
+
84
+
85
+ class JSONLSerializer(RecordSerializer):
86
+ """JSON Lines serializer."""
87
+
88
+ def serialize(self, record: dict[str, Any]) -> bytes:
89
+ """Serialize a single record to JSONL format."""
90
+ return (json.dumps(record, default=str) + "\n").encode("utf-8")
91
+
92
+ def serialize_batch(self, records: list[dict[str, Any]]) -> bytes:
93
+ """Serialize a batch of records to JSONL format."""
94
+ lines = [json.dumps(r, default=str) for r in records]
95
+ return ("\n".join(lines) + "\n").encode("utf-8")
96
+
97
+ def get_content_type(self) -> str:
98
+ return "application/x-ndjson"
99
+
100
+
101
+ class CSVSerializer(RecordSerializer):
102
+ """CSV serializer."""
103
+
104
+ def __init__(self, columns: list[str] | None = None):
105
+ self.columns = columns
106
+ self._header_written = False
107
+
108
+ def serialize(self, record: dict[str, Any]) -> bytes:
109
+ """Serialize a single record to CSV format."""
110
+ import csv
111
+ from io import StringIO
112
+
113
+ output = StringIO()
114
+ if self.columns is None:
115
+ self.columns = list(record.keys())
116
+
117
+ writer = csv.DictWriter(output, fieldnames=self.columns, extrasaction="ignore")
118
+
119
+ if not self._header_written:
120
+ writer.writeheader()
121
+ self._header_written = True
122
+
123
+ writer.writerow(record)
124
+ return output.getvalue().encode("utf-8")
125
+
126
+ def serialize_batch(self, records: list[dict[str, Any]]) -> bytes:
127
+ """Serialize a batch of records to CSV format."""
128
+ import csv
129
+ from io import StringIO
130
+
131
+ if not records:
132
+ return b""
133
+
134
+ output = StringIO()
135
+ if self.columns is None:
136
+ self.columns = list(records[0].keys())
137
+
138
+ writer = csv.DictWriter(output, fieldnames=self.columns, extrasaction="ignore")
139
+
140
+ if not self._header_written:
141
+ writer.writeheader()
142
+ self._header_written = True
143
+
144
+ for record in records:
145
+ writer.writerow(record)
146
+
147
+ return output.getvalue().encode("utf-8")
148
+
149
+ def get_content_type(self) -> str:
150
+ return "text/csv"
151
+
152
+
153
+ def get_serializer(format: StreamingFormat, **kwargs: Any) -> RecordSerializer:
154
+ """Get a serializer for the specified format."""
155
+ if format == StreamingFormat.JSONL or format == StreamingFormat.NDJSON:
156
+ return JSONLSerializer()
157
+ elif format == StreamingFormat.CSV:
158
+ return CSVSerializer(columns=kwargs.get("columns"))
159
+ else:
160
+ raise ValueError(f"Unsupported format: {format}")
161
+
162
+
163
+ # =============================================================================
164
+ # Compressors
165
+ # =============================================================================
166
+
167
+
168
+ class Compressor(ABC):
169
+ """Abstract compressor."""
170
+
171
+ @abstractmethod
172
+ def compress(self, data: bytes) -> bytes:
173
+ """Compress data."""
174
+ pass
175
+
176
+ @abstractmethod
177
+ def get_extension(self) -> str:
178
+ """Get file extension for compressed files."""
179
+ pass
180
+
181
+
182
+ class NoCompressor(Compressor):
183
+ """No compression."""
184
+
185
+ def compress(self, data: bytes) -> bytes:
186
+ return data
187
+
188
+ def get_extension(self) -> str:
189
+ return ""
190
+
191
+
192
+ class GzipCompressor(Compressor):
193
+ """Gzip compression."""
194
+
195
+ def __init__(self, level: int = 6):
196
+ self.level = level
197
+
198
+ def compress(self, data: bytes) -> bytes:
199
+ return gzip.compress(data, compresslevel=self.level)
200
+
201
+ def get_extension(self) -> str:
202
+ return ".gz"
203
+
204
+
205
+ class ZstdCompressor(Compressor):
206
+ """Zstandard compression."""
207
+
208
+ def __init__(self, level: int = 3):
209
+ self.level = level
210
+ self._compressor = None
211
+
212
+ def _get_compressor(self) -> Any:
213
+ if self._compressor is None:
214
+ try:
215
+ import zstandard as zstd
216
+
217
+ self._compressor = zstd.ZstdCompressor(level=self.level)
218
+ except ImportError:
219
+ raise ImportError("zstandard library required for zstd compression")
220
+ return self._compressor
221
+
222
+ def compress(self, data: bytes) -> bytes:
223
+ compressor = self._get_compressor()
224
+ return compressor.compress(data)
225
+
226
+ def get_extension(self) -> str:
227
+ return ".zst"
228
+
229
+
230
+ class LZ4Compressor(Compressor):
231
+ """LZ4 compression."""
232
+
233
+ def compress(self, data: bytes) -> bytes:
234
+ try:
235
+ import lz4.frame
236
+
237
+ return lz4.frame.compress(data)
238
+ except ImportError:
239
+ raise ImportError("lz4 library required for lz4 compression")
240
+
241
+ def get_extension(self) -> str:
242
+ return ".lz4"
243
+
244
+
245
+ def get_compressor(compression: CompressionType, **kwargs: Any) -> Compressor:
246
+ """Get a compressor for the specified type."""
247
+ if compression == CompressionType.NONE:
248
+ return NoCompressor()
249
+ elif compression == CompressionType.GZIP:
250
+ return GzipCompressor(level=kwargs.get("level", 6))
251
+ elif compression == CompressionType.ZSTD:
252
+ return ZstdCompressor(level=kwargs.get("level", 3))
253
+ elif compression == CompressionType.LZ4:
254
+ return LZ4Compressor()
255
+ else:
256
+ raise ValueError(f"Unsupported compression: {compression}")
257
+
258
+
259
+ # =============================================================================
260
+ # Buffer Management
261
+ # =============================================================================
262
+
263
+
264
+ @dataclass
265
+ class WriteBuffer:
266
+ """In-memory buffer for batching writes.
267
+
268
+ Attributes:
269
+ max_records: Maximum records before flush.
270
+ max_bytes: Maximum bytes before flush.
271
+ records: Buffered records.
272
+ byte_size: Current buffer size in bytes.
273
+ """
274
+
275
+ max_records: int = 1000
276
+ max_bytes: int = 10 * 1024 * 1024 # 10MB
277
+ records: list[dict[str, Any]] = field(default_factory=list)
278
+ byte_size: int = 0
279
+
280
+ def add(self, record: dict[str, Any]) -> bool:
281
+ """Add a record to the buffer.
282
+
283
+ Returns:
284
+ True if buffer should be flushed.
285
+ """
286
+ record_size = len(json.dumps(record, default=str).encode("utf-8"))
287
+
288
+ self.records.append(record)
289
+ self.byte_size += record_size
290
+
291
+ return self.should_flush()
292
+
293
+ def add_batch(self, records: list[dict[str, Any]]) -> bool:
294
+ """Add multiple records to the buffer.
295
+
296
+ Returns:
297
+ True if buffer should be flushed.
298
+ """
299
+ for record in records:
300
+ record_size = len(json.dumps(record, default=str).encode("utf-8"))
301
+ self.records.append(record)
302
+ self.byte_size += record_size
303
+
304
+ return self.should_flush()
305
+
306
+ def should_flush(self) -> bool:
307
+ """Check if buffer should be flushed."""
308
+ return len(self.records) >= self.max_records or self.byte_size >= self.max_bytes
309
+
310
+ def clear(self) -> list[dict[str, Any]]:
311
+ """Clear and return buffered records."""
312
+ records = self.records
313
+ self.records = []
314
+ self.byte_size = 0
315
+ return records
316
+
317
+ def is_empty(self) -> bool:
318
+ """Check if buffer is empty."""
319
+ return len(self.records) == 0
320
+
321
+
322
+ # =============================================================================
323
+ # Base Writer
324
+ # =============================================================================
325
+
326
+
327
+ class BaseStreamWriter(ABC):
328
+ """Base class for streaming writers.
329
+
330
+ Handles buffering, serialization, and compression.
331
+ """
332
+
333
+ def __init__(
334
+ self,
335
+ session: StreamSession,
336
+ config: StreamingConfig,
337
+ serializer: RecordSerializer | None = None,
338
+ compressor: Compressor | None = None,
339
+ ):
340
+ """Initialize the writer.
341
+
342
+ Args:
343
+ session: The streaming session.
344
+ config: Streaming configuration.
345
+ serializer: Record serializer (auto-selected if None).
346
+ compressor: Data compressor (auto-selected if None).
347
+ """
348
+ self.session = session
349
+ self.config = config
350
+ self.serializer = serializer or get_serializer(config.format)
351
+ self.compressor = compressor or get_compressor(config.compression)
352
+
353
+ self.buffer = WriteBuffer(
354
+ max_records=config.buffer_size,
355
+ max_bytes=config.max_memory_mb * 1024 * 1024 // 4, # 25% of max memory
356
+ )
357
+
358
+ self.metrics = session.metrics
359
+ self._chunk_index = len(session.chunks)
360
+ self._record_offset = sum(c.record_count for c in session.chunks)
361
+ self._closed = False
362
+ self._lock = threading.RLock()
363
+
364
+ # Auto-flush timer
365
+ self._last_flush_time = time.time()
366
+ self._flush_timer: threading.Timer | None = None
367
+
368
+ if config.flush_interval_seconds > 0:
369
+ self._start_flush_timer()
370
+
371
+ def _start_flush_timer(self) -> None:
372
+ """Start the auto-flush timer."""
373
+ if self._flush_timer is not None:
374
+ self._flush_timer.cancel()
375
+
376
+ self._flush_timer = threading.Timer(
377
+ self.config.flush_interval_seconds,
378
+ self._auto_flush,
379
+ )
380
+ self._flush_timer.daemon = True
381
+ self._flush_timer.start()
382
+
383
+ def _auto_flush(self) -> None:
384
+ """Auto-flush callback."""
385
+ if not self._closed and not self.buffer.is_empty():
386
+ try:
387
+ self.flush()
388
+ except Exception:
389
+ pass # Ignore auto-flush errors
390
+ if not self._closed:
391
+ self._start_flush_timer()
392
+
393
+ def write(self, record: dict[str, Any]) -> None:
394
+ """Write a single record.
395
+
396
+ Args:
397
+ record: The record to write.
398
+ """
399
+ if self._closed:
400
+ raise StreamWriteError("Writer is closed")
401
+
402
+ with self._lock:
403
+ if self.buffer.add(record):
404
+ self.flush()
405
+
406
+ def write_result(self, result: "ValidatorResult") -> None:
407
+ """Write a ValidatorResult.
408
+
409
+ Args:
410
+ result: The validator result to write.
411
+ """
412
+ self.write(result.to_dict())
413
+
414
+ def write_batch(self, records: list[dict[str, Any]]) -> None:
415
+ """Write a batch of records.
416
+
417
+ Args:
418
+ records: The records to write.
419
+ """
420
+ if self._closed:
421
+ raise StreamWriteError("Writer is closed")
422
+
423
+ with self._lock:
424
+ if self.buffer.add_batch(records):
425
+ self.flush()
426
+
427
+ def write_results(self, results: list["ValidatorResult"]) -> None:
428
+ """Write a batch of ValidatorResults.
429
+
430
+ Args:
431
+ results: The validator results to write.
432
+ """
433
+ self.write_batch([r.to_dict() for r in results])
434
+
435
+ def flush(self) -> ChunkInfo:
436
+ """Flush buffered records to storage.
437
+
438
+ Returns:
439
+ Information about the written chunk.
440
+ """
441
+ if self._closed:
442
+ raise StreamWriteError("Writer is closed")
443
+
444
+ with self._lock:
445
+ records = self.buffer.clear()
446
+ if not records:
447
+ return ChunkInfo(
448
+ chunk_id="",
449
+ chunk_index=-1,
450
+ record_count=0,
451
+ byte_size=0,
452
+ start_offset=self._record_offset,
453
+ end_offset=self._record_offset,
454
+ )
455
+
456
+ # Serialize
457
+ data = self.serializer.serialize_batch(records)
458
+
459
+ # Compute checksum before compression
460
+ checksum = hashlib.md5(data).hexdigest()
461
+
462
+ # Compress
463
+ compressed_data = self.compressor.compress(data)
464
+
465
+ # Create chunk info
466
+ chunk_id = f"{self.session.run_id}_chunk_{self._chunk_index:06d}"
467
+ chunk_info = ChunkInfo(
468
+ chunk_id=chunk_id,
469
+ chunk_index=self._chunk_index,
470
+ record_count=len(records),
471
+ byte_size=len(compressed_data),
472
+ start_offset=self._record_offset,
473
+ end_offset=self._record_offset + len(records),
474
+ checksum=checksum,
475
+ )
476
+
477
+ # Write to storage
478
+ try:
479
+ self._write_chunk(chunk_info, compressed_data)
480
+ except Exception as e:
481
+ # Retry logic
482
+ for attempt in range(self.config.max_retries):
483
+ try:
484
+ time.sleep(self.config.retry_delay_seconds * (2**attempt))
485
+ self._write_chunk(chunk_info, compressed_data)
486
+ self.metrics.retry_count += 1
487
+ break
488
+ except Exception:
489
+ if attempt == self.config.max_retries - 1:
490
+ self.metrics.record_error(str(e))
491
+ raise StreamFlushError(f"Failed to write chunk: {e}")
492
+
493
+ # Update state
494
+ self.session.chunks.append(chunk_info)
495
+ self._chunk_index += 1
496
+ self._record_offset += len(records)
497
+ self._last_flush_time = time.time()
498
+
499
+ # Update metrics
500
+ self.metrics.record_write(len(records), len(compressed_data))
501
+ self.metrics.record_chunk(is_write=True)
502
+ self.metrics.flush_count += 1
503
+
504
+ # Checkpoint if needed
505
+ if (
506
+ self.config.enable_checkpoints
507
+ and self._record_offset - self.session.checkpoint_offset
508
+ >= self.config.checkpoint_interval
509
+ ):
510
+ self._write_checkpoint()
511
+
512
+ return chunk_info
513
+
514
+ @abstractmethod
515
+ def _write_chunk(self, chunk_info: ChunkInfo, data: bytes) -> None:
516
+ """Write a chunk to storage.
517
+
518
+ Args:
519
+ chunk_info: Chunk metadata.
520
+ data: Compressed chunk data.
521
+ """
522
+ pass
523
+
524
+ def _write_checkpoint(self) -> None:
525
+ """Write a checkpoint for recovery."""
526
+ self.session.checkpoint_offset = self._record_offset
527
+ self.session.updated_at = datetime.now()
528
+ self._write_session_state()
529
+
530
+ @abstractmethod
531
+ def _write_session_state(self) -> None:
532
+ """Write session state for recovery."""
533
+ pass
534
+
535
+ def close(self) -> None:
536
+ """Close the writer and finalize."""
537
+ if self._closed:
538
+ return
539
+
540
+ with self._lock:
541
+ # Stop flush timer
542
+ if self._flush_timer is not None:
543
+ self._flush_timer.cancel()
544
+ self._flush_timer = None
545
+
546
+ # Flush remaining records
547
+ if not self.buffer.is_empty():
548
+ self.flush()
549
+
550
+ # Update session
551
+ self.session.status = StreamStatus.COMPLETED
552
+ self.session.updated_at = datetime.now()
553
+ self.metrics.finish()
554
+
555
+ # Write final state
556
+ self._write_session_state()
557
+ self._finalize()
558
+
559
+ self._closed = True
560
+
561
+ @abstractmethod
562
+ def _finalize(self) -> None:
563
+ """Finalize the stream (e.g., create manifest)."""
564
+ pass
565
+
566
+ def __enter__(self) -> "BaseStreamWriter":
567
+ """Context manager entry."""
568
+ self.metrics.start()
569
+ self.session.status = StreamStatus.ACTIVE
570
+ return self
571
+
572
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
573
+ """Context manager exit."""
574
+ if exc_type is not None:
575
+ self.session.status = StreamStatus.FAILED
576
+ self.metrics.record_error(str(exc_val))
577
+ self.close()
578
+
579
+
580
+ # =============================================================================
581
+ # Concrete Writers
582
+ # =============================================================================
583
+
584
+
585
+ class StreamingResultWriter(BaseStreamWriter):
586
+ """Filesystem-based streaming writer.
587
+
588
+ Writes records to JSONL files on the local filesystem.
589
+ """
590
+
591
+ def __init__(
592
+ self,
593
+ session: StreamSession,
594
+ config: StreamingConfig,
595
+ base_path: Path | str,
596
+ serializer: RecordSerializer | None = None,
597
+ compressor: Compressor | None = None,
598
+ ):
599
+ """Initialize the filesystem writer.
600
+
601
+ Args:
602
+ session: The streaming session.
603
+ config: Streaming configuration.
604
+ base_path: Base directory for writing chunks.
605
+ serializer: Record serializer.
606
+ compressor: Data compressor.
607
+ """
608
+ super().__init__(session, config, serializer, compressor)
609
+ self.base_path = Path(base_path)
610
+ self.base_path.mkdir(parents=True, exist_ok=True)
611
+
612
+ # Create run directory
613
+ self.run_path = self.base_path / session.run_id
614
+ self.run_path.mkdir(parents=True, exist_ok=True)
615
+
616
+ def _get_chunk_path(self, chunk_info: ChunkInfo) -> Path:
617
+ """Get the file path for a chunk."""
618
+ ext = {
619
+ StreamingFormat.JSONL: ".jsonl",
620
+ StreamingFormat.NDJSON: ".ndjson",
621
+ StreamingFormat.CSV: ".csv",
622
+ StreamingFormat.PARQUET: ".parquet",
623
+ }.get(self.config.format, ".jsonl")
624
+
625
+ ext += self.compressor.get_extension()
626
+ return self.run_path / f"{chunk_info.chunk_id}{ext}"
627
+
628
+ def _write_chunk(self, chunk_info: ChunkInfo, data: bytes) -> None:
629
+ """Write a chunk to the filesystem."""
630
+ chunk_path = self._get_chunk_path(chunk_info)
631
+ chunk_info.path = str(chunk_path)
632
+
633
+ # Atomic write: write to temp file then rename
634
+ temp_path = chunk_path.with_suffix(chunk_path.suffix + ".tmp")
635
+ try:
636
+ with open(temp_path, "wb") as f:
637
+ f.write(data)
638
+ temp_path.rename(chunk_path)
639
+ except Exception:
640
+ if temp_path.exists():
641
+ temp_path.unlink()
642
+ raise
643
+
644
+ def _write_session_state(self) -> None:
645
+ """Write session state to a manifest file."""
646
+ manifest_path = self.run_path / "_manifest.json"
647
+ with open(manifest_path, "w") as f:
648
+ json.dump(self.session.to_dict(), f, indent=2, default=str)
649
+
650
+ def _finalize(self) -> None:
651
+ """Create final manifest."""
652
+ # Already handled in _write_session_state
653
+ pass
654
+
655
+
656
+ class BufferedStreamWriter(BaseStreamWriter):
657
+ """In-memory buffered writer for testing or small results.
658
+
659
+ Accumulates all records in memory and writes on close.
660
+ """
661
+
662
+ def __init__(
663
+ self,
664
+ session: StreamSession,
665
+ config: StreamingConfig,
666
+ output: BinaryIO | None = None,
667
+ ):
668
+ """Initialize the buffered writer.
669
+
670
+ Args:
671
+ session: The streaming session.
672
+ config: Streaming configuration.
673
+ output: Output stream (BytesIO created if None).
674
+ """
675
+ super().__init__(session, config)
676
+ self.output = output or BytesIO()
677
+ self._all_records: list[dict[str, Any]] = []
678
+
679
+ def _write_chunk(self, chunk_info: ChunkInfo, data: bytes) -> None:
680
+ """Accumulate chunk data in memory."""
681
+ self.output.write(data)
682
+
683
+ def _write_session_state(self) -> None:
684
+ """No-op for buffered writer."""
685
+ pass
686
+
687
+ def _finalize(self) -> None:
688
+ """No-op for buffered writer."""
689
+ pass
690
+
691
+ def get_output(self) -> bytes:
692
+ """Get the accumulated output."""
693
+ if isinstance(self.output, BytesIO):
694
+ return self.output.getvalue()
695
+ return b""
696
+
697
+
698
+ class AsyncStreamWriter:
699
+ """Async wrapper for streaming writers.
700
+
701
+ Provides async interface for any BaseStreamWriter.
702
+ """
703
+
704
+ def __init__(self, writer: BaseStreamWriter):
705
+ """Initialize the async writer.
706
+
707
+ Args:
708
+ writer: The underlying synchronous writer.
709
+ """
710
+ self._writer = writer
711
+ self._loop = asyncio.get_event_loop()
712
+
713
+ async def write(self, record: dict[str, Any]) -> None:
714
+ """Write a single record asynchronously."""
715
+ await self._loop.run_in_executor(None, self._writer.write, record)
716
+
717
+ async def write_result(self, result: "ValidatorResult") -> None:
718
+ """Write a ValidatorResult asynchronously."""
719
+ await self._loop.run_in_executor(None, self._writer.write_result, result)
720
+
721
+ async def write_batch(self, records: list[dict[str, Any]]) -> None:
722
+ """Write a batch of records asynchronously."""
723
+ await self._loop.run_in_executor(None, self._writer.write_batch, records)
724
+
725
+ async def write_results(self, results: list["ValidatorResult"]) -> None:
726
+ """Write a batch of ValidatorResults asynchronously."""
727
+ await self._loop.run_in_executor(None, self._writer.write_results, results)
728
+
729
+ async def flush(self) -> ChunkInfo:
730
+ """Flush buffered records asynchronously."""
731
+ return await self._loop.run_in_executor(None, self._writer.flush)
732
+
733
+ async def close(self) -> None:
734
+ """Close the writer asynchronously."""
735
+ await self._loop.run_in_executor(None, self._writer.close)
736
+
737
+ async def __aenter__(self) -> "AsyncStreamWriter":
738
+ """Async context manager entry."""
739
+ self._writer.metrics.start()
740
+ self._writer.session.status = StreamStatus.ACTIVE
741
+ return self
742
+
743
+ async def __aexit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
744
+ """Async context manager exit."""
745
+ if exc_type is not None:
746
+ self._writer.session.status = StreamStatus.FAILED
747
+ self._writer.metrics.record_error(str(exc_val))
748
+ await self.close()
749
+
750
+ @property
751
+ def session(self) -> StreamSession:
752
+ """Get the streaming session."""
753
+ return self._writer.session
754
+
755
+ @property
756
+ def metrics(self) -> StreamingMetrics:
757
+ """Get the streaming metrics."""
758
+ return self._writer.metrics
759
+
760
+
761
+ # =============================================================================
762
+ # Factory Functions
763
+ # =============================================================================
764
+
765
+
766
+ def create_stream_writer(
767
+ session: StreamSession,
768
+ config: StreamingConfig,
769
+ backend: str = "filesystem",
770
+ **kwargs: Any,
771
+ ) -> BaseStreamWriter:
772
+ """Create a streaming writer for the specified backend.
773
+
774
+ Args:
775
+ session: The streaming session.
776
+ config: Streaming configuration.
777
+ backend: Storage backend ("filesystem", "memory", "s3", "gcs", "database").
778
+ **kwargs: Backend-specific options.
779
+
780
+ Returns:
781
+ A streaming writer instance.
782
+ """
783
+ if backend == "filesystem":
784
+ base_path = kwargs.get("base_path", ".truthound/streaming")
785
+ return StreamingResultWriter(session, config, base_path)
786
+ elif backend == "memory":
787
+ output = kwargs.get("output")
788
+ return BufferedStreamWriter(session, config, output)
789
+ else:
790
+ raise ValueError(f"Unsupported backend: {backend}")