truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,868 @@
1
+ """Compression pipeline for chained transformations.
2
+
3
+ This module provides a pipeline architecture for composing multiple
4
+ compression stages and data transformations.
5
+
6
+ Example:
7
+ >>> from truthound.stores.compression import (
8
+ ... CompressionPipeline,
9
+ ... PipelineStage,
10
+ ... DeduplicationTransform,
11
+ ... GzipCompressor,
12
+ ... )
13
+ >>>
14
+ >>> pipeline = (
15
+ ... CompressionPipeline()
16
+ ... .add_transform(DeduplicationTransform())
17
+ ... .add_compression(GzipCompressor())
18
+ ... )
19
+ >>>
20
+ >>> result = pipeline.process(data)
21
+ >>> original = pipeline.reverse(result.data)
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import hashlib
27
+ import struct
28
+ from abc import ABC, abstractmethod
29
+ from dataclasses import dataclass, field
30
+ from datetime import datetime
31
+ from enum import Enum, auto
32
+ from typing import Any, Callable, Generic, TypeVar
33
+
34
+ from truthound.stores.compression.base import (
35
+ CompressionAlgorithm,
36
+ CompressionConfig,
37
+ CompressionError,
38
+ CompressionMetrics,
39
+ CompressionResult,
40
+ Compressor,
41
+ Decompressor,
42
+ )
43
+
44
+
45
+ # =============================================================================
46
+ # Exceptions
47
+ # =============================================================================
48
+
49
+
50
+ class PipelineError(CompressionError):
51
+ """Error during pipeline processing."""
52
+
53
+ def __init__(self, message: str, stage: str | None = None) -> None:
54
+ self.stage = stage
55
+ if stage:
56
+ message = f"[Stage: {stage}] {message}"
57
+ super().__init__(message)
58
+
59
+
60
+ class TransformError(PipelineError):
61
+ """Error during transform operation."""
62
+
63
+ pass
64
+
65
+
66
+ # =============================================================================
67
+ # Enums
68
+ # =============================================================================
69
+
70
+
71
+ class StageType(Enum):
72
+ """Type of pipeline stage."""
73
+
74
+ TRANSFORM = auto() # Data transformation (dedup, delta, etc.)
75
+ COMPRESS = auto() # Compression stage
76
+ ENCRYPT = auto() # Encryption stage (placeholder)
77
+ CHECKSUM = auto() # Checksum/validation stage
78
+
79
+
80
+ class TransformDirection(Enum):
81
+ """Direction of transformation."""
82
+
83
+ FORWARD = auto() # Apply transform
84
+ REVERSE = auto() # Reverse transform
85
+
86
+
87
+ # =============================================================================
88
+ # Transform Protocols and Base Classes
89
+ # =============================================================================
90
+
91
+
92
+ T = TypeVar("T", bytes, bytearray)
93
+
94
+
95
+ class Transform(ABC):
96
+ """Base class for data transformations.
97
+
98
+ Transforms are reversible operations that modify data before compression.
99
+ Common transforms include deduplication, delta encoding, and dictionary
100
+ preprocessing.
101
+ """
102
+
103
+ @property
104
+ @abstractmethod
105
+ def name(self) -> str:
106
+ """Get transform name."""
107
+ ...
108
+
109
+ @property
110
+ def is_reversible(self) -> bool:
111
+ """Check if transform is reversible."""
112
+ return True
113
+
114
+ @abstractmethod
115
+ def apply(self, data: bytes) -> bytes:
116
+ """Apply transformation to data.
117
+
118
+ Args:
119
+ data: Input data.
120
+
121
+ Returns:
122
+ Transformed data.
123
+ """
124
+ ...
125
+
126
+ @abstractmethod
127
+ def reverse(self, data: bytes) -> bytes:
128
+ """Reverse transformation.
129
+
130
+ Args:
131
+ data: Transformed data.
132
+
133
+ Returns:
134
+ Original data.
135
+ """
136
+ ...
137
+
138
+ def get_stats(self) -> dict[str, Any]:
139
+ """Get transform statistics."""
140
+ return {"name": self.name}
141
+
142
+
143
+ # =============================================================================
144
+ # Built-in Transforms
145
+ # =============================================================================
146
+
147
+
148
+ class IdentityTransform(Transform):
149
+ """Identity transform that returns data unchanged."""
150
+
151
+ @property
152
+ def name(self) -> str:
153
+ return "identity"
154
+
155
+ def apply(self, data: bytes) -> bytes:
156
+ return data
157
+
158
+ def reverse(self, data: bytes) -> bytes:
159
+ return data
160
+
161
+
162
+ class DeduplicationTransform(Transform):
163
+ """Block-level deduplication transform.
164
+
165
+ Identifies and removes duplicate blocks, replacing them with references.
166
+ Effective for data with repeated patterns.
167
+
168
+ Format:
169
+ [4 bytes: block_size]
170
+ [4 bytes: num_unique_blocks]
171
+ [4 bytes: num_references]
172
+ [unique_blocks...]
173
+ [references: block_index for each original block position]
174
+ """
175
+
176
+ def __init__(self, block_size: int = 4096) -> None:
177
+ """Initialize deduplication transform.
178
+
179
+ Args:
180
+ block_size: Size of blocks for deduplication.
181
+ """
182
+ self.block_size = block_size
183
+ self._stats: dict[str, Any] = {}
184
+
185
+ @property
186
+ def name(self) -> str:
187
+ return "deduplication"
188
+
189
+ def apply(self, data: bytes) -> bytes:
190
+ """Apply deduplication."""
191
+ if len(data) < self.block_size * 2:
192
+ # Too small to benefit from dedup
193
+ return self._wrap_passthrough(data)
194
+
195
+ # Split into blocks
196
+ blocks: list[bytes] = []
197
+ for i in range(0, len(data), self.block_size):
198
+ blocks.append(data[i : i + self.block_size])
199
+
200
+ # Find unique blocks
201
+ block_hashes: dict[bytes, int] = {} # hash -> index in unique_blocks
202
+ unique_blocks: list[bytes] = []
203
+ references: list[int] = []
204
+
205
+ for block in blocks:
206
+ block_hash = hashlib.md5(block).digest()
207
+ if block_hash in block_hashes:
208
+ references.append(block_hashes[block_hash])
209
+ else:
210
+ idx = len(unique_blocks)
211
+ block_hashes[block_hash] = idx
212
+ unique_blocks.append(block)
213
+ references.append(idx)
214
+
215
+ # Calculate stats
216
+ original_size = len(data)
217
+ dedup_ratio = len(unique_blocks) / len(blocks) if blocks else 1.0
218
+
219
+ self._stats = {
220
+ "original_blocks": len(blocks),
221
+ "unique_blocks": len(unique_blocks),
222
+ "dedup_ratio": dedup_ratio,
223
+ "space_saved_percent": (1 - dedup_ratio) * 100,
224
+ }
225
+
226
+ # If no significant dedup, return passthrough
227
+ if len(unique_blocks) >= len(blocks) * 0.9:
228
+ return self._wrap_passthrough(data)
229
+
230
+ # Build output
231
+ output = bytearray()
232
+ # Header: magic, block_size, num_unique, num_refs, last_block_size
233
+ last_block_size = len(blocks[-1]) if blocks else 0
234
+ output.extend(struct.pack("<4sIIII", b"DDUP", self.block_size, len(unique_blocks), len(references), last_block_size))
235
+
236
+ # Unique blocks
237
+ for block in unique_blocks:
238
+ output.extend(struct.pack("<I", len(block)))
239
+ output.extend(block)
240
+
241
+ # References
242
+ for ref in references:
243
+ output.extend(struct.pack("<I", ref))
244
+
245
+ return bytes(output)
246
+
247
+ def reverse(self, data: bytes) -> bytes:
248
+ """Reverse deduplication."""
249
+ if len(data) < 4:
250
+ raise TransformError("Invalid dedup data: too short", self.name)
251
+
252
+ # Check for passthrough
253
+ if data[:4] == b"PASS":
254
+ size = struct.unpack("<I", data[4:8])[0]
255
+ return data[8 : 8 + size]
256
+
257
+ # Parse header
258
+ if data[:4] != b"DDUP":
259
+ raise TransformError("Invalid dedup magic", self.name)
260
+
261
+ offset = 4
262
+ block_size, num_unique, num_refs, last_block_size = struct.unpack("<IIII", data[offset : offset + 16])
263
+ offset += 16
264
+
265
+ # Read unique blocks
266
+ unique_blocks: list[bytes] = []
267
+ for _ in range(num_unique):
268
+ block_len = struct.unpack("<I", data[offset : offset + 4])[0]
269
+ offset += 4
270
+ unique_blocks.append(data[offset : offset + block_len])
271
+ offset += block_len
272
+
273
+ # Read references and reconstruct
274
+ output = bytearray()
275
+ for i in range(num_refs):
276
+ ref = struct.unpack("<I", data[offset : offset + 4])[0]
277
+ offset += 4
278
+
279
+ block = unique_blocks[ref]
280
+ # Last block may be shorter
281
+ if i == num_refs - 1 and last_block_size > 0:
282
+ block = block[:last_block_size]
283
+ output.extend(block)
284
+
285
+ return bytes(output)
286
+
287
+ def _wrap_passthrough(self, data: bytes) -> bytes:
288
+ """Wrap data as passthrough (no dedup applied)."""
289
+ return b"PASS" + struct.pack("<I", len(data)) + data
290
+
291
+ def get_stats(self) -> dict[str, Any]:
292
+ return {**super().get_stats(), **self._stats}
293
+
294
+
295
+ class DeltaEncodingTransform(Transform):
296
+ """Delta encoding transform for numerical data.
297
+
298
+ Stores differences between consecutive values instead of absolute values.
299
+ Effective for time-series or sequential numerical data.
300
+
301
+ Format:
302
+ [4 bytes: magic 'DLTA']
303
+ [4 bytes: original_length]
304
+ [first_byte]
305
+ [delta_bytes...]
306
+ """
307
+
308
+ @property
309
+ def name(self) -> str:
310
+ return "delta_encoding"
311
+
312
+ def apply(self, data: bytes) -> bytes:
313
+ """Apply delta encoding."""
314
+ if len(data) < 2:
315
+ return self._wrap_passthrough(data)
316
+
317
+ output = bytearray()
318
+ output.extend(b"DLTA")
319
+ output.extend(struct.pack("<I", len(data)))
320
+ output.append(data[0])
321
+
322
+ for i in range(1, len(data)):
323
+ delta = (data[i] - data[i - 1]) & 0xFF
324
+ output.append(delta)
325
+
326
+ return bytes(output)
327
+
328
+ def reverse(self, data: bytes) -> bytes:
329
+ """Reverse delta encoding."""
330
+ if len(data) < 4:
331
+ raise TransformError("Invalid delta data: too short", self.name)
332
+
333
+ # Check for passthrough
334
+ if data[:4] == b"PASS":
335
+ size = struct.unpack("<I", data[4:8])[0]
336
+ return data[8 : 8 + size]
337
+
338
+ if data[:4] != b"DLTA":
339
+ raise TransformError("Invalid delta magic", self.name)
340
+
341
+ original_length = struct.unpack("<I", data[4:8])[0]
342
+ output = bytearray()
343
+ output.append(data[8])
344
+
345
+ for i in range(1, original_length):
346
+ value = (output[i - 1] + data[8 + i]) & 0xFF
347
+ output.append(value)
348
+
349
+ return bytes(output)
350
+
351
+ def _wrap_passthrough(self, data: bytes) -> bytes:
352
+ """Wrap data as passthrough."""
353
+ return b"PASS" + struct.pack("<I", len(data)) + data
354
+
355
+
356
+ class RunLengthTransform(Transform):
357
+ """Run-length encoding transform.
358
+
359
+ Compresses consecutive repeated bytes.
360
+ Effective for data with many repeated values.
361
+
362
+ Format:
363
+ [4 bytes: magic 'RLNC']
364
+ [4 bytes: original_length]
365
+ [encoded_data: count, value pairs]
366
+ """
367
+
368
+ def __init__(self, min_run: int = 4) -> None:
369
+ """Initialize RLE transform.
370
+
371
+ Args:
372
+ min_run: Minimum run length to encode.
373
+ """
374
+ self.min_run = min_run
375
+
376
+ @property
377
+ def name(self) -> str:
378
+ return "run_length"
379
+
380
+ def apply(self, data: bytes) -> bytes:
381
+ """Apply run-length encoding."""
382
+ if len(data) < 2:
383
+ return self._wrap_passthrough(data)
384
+
385
+ output = bytearray()
386
+ output.extend(b"RLNC")
387
+ output.extend(struct.pack("<I", len(data)))
388
+
389
+ i = 0
390
+ while i < len(data):
391
+ # Count consecutive bytes
392
+ run_length = 1
393
+ while i + run_length < len(data) and data[i + run_length] == data[i] and run_length < 255:
394
+ run_length += 1
395
+
396
+ if run_length >= self.min_run:
397
+ # Encode as run
398
+ output.append(0) # Marker for run
399
+ output.append(run_length)
400
+ output.append(data[i])
401
+ else:
402
+ # Find literal sequence
403
+ literal_start = i
404
+ while i < len(data):
405
+ # Check if next is a run
406
+ next_run = 1
407
+ while i + next_run < len(data) and data[i + next_run] == data[i] and next_run < 255:
408
+ next_run += 1
409
+ if next_run >= self.min_run:
410
+ break
411
+ i += 1
412
+ if i - literal_start >= 127:
413
+ break
414
+
415
+ literal_len = i - literal_start
416
+ output.append(literal_len | 0x80) # High bit = literal
417
+ output.extend(data[literal_start:i])
418
+ continue
419
+
420
+ i += run_length
421
+
422
+ return bytes(output)
423
+
424
+ def reverse(self, data: bytes) -> bytes:
425
+ """Reverse run-length encoding."""
426
+ if len(data) < 4:
427
+ raise TransformError("Invalid RLE data: too short", self.name)
428
+
429
+ if data[:4] == b"PASS":
430
+ size = struct.unpack("<I", data[4:8])[0]
431
+ return data[8 : 8 + size]
432
+
433
+ if data[:4] != b"RLNC":
434
+ raise TransformError("Invalid RLE magic", self.name)
435
+
436
+ original_length = struct.unpack("<I", data[4:8])[0]
437
+ output = bytearray()
438
+ i = 8
439
+
440
+ while i < len(data) and len(output) < original_length:
441
+ marker = data[i]
442
+ i += 1
443
+
444
+ if marker == 0:
445
+ # Run
446
+ run_length = data[i]
447
+ value = data[i + 1]
448
+ output.extend([value] * run_length)
449
+ i += 2
450
+ elif marker & 0x80:
451
+ # Literal
452
+ literal_len = marker & 0x7F
453
+ output.extend(data[i : i + literal_len])
454
+ i += literal_len
455
+
456
+ return bytes(output[:original_length])
457
+
458
+ def _wrap_passthrough(self, data: bytes) -> bytes:
459
+ """Wrap data as passthrough."""
460
+ return b"PASS" + struct.pack("<I", len(data)) + data
461
+
462
+
463
+ # =============================================================================
464
+ # Pipeline Stage
465
+ # =============================================================================
466
+
467
+
468
+ @dataclass
469
+ class PipelineStage:
470
+ """A single stage in the compression pipeline.
471
+
472
+ Attributes:
473
+ name: Stage identifier.
474
+ stage_type: Type of stage (transform, compress, etc.).
475
+ processor: The actual processor (Transform or Compressor).
476
+ enabled: Whether this stage is enabled.
477
+ config: Optional stage-specific configuration.
478
+ """
479
+
480
+ name: str
481
+ stage_type: StageType
482
+ processor: Transform | Compressor | Any
483
+ enabled: bool = True
484
+ config: dict[str, Any] = field(default_factory=dict)
485
+
486
+ def process(self, data: bytes) -> bytes:
487
+ """Process data through this stage."""
488
+ if not self.enabled:
489
+ return data
490
+
491
+ if self.stage_type == StageType.TRANSFORM:
492
+ return self.processor.apply(data)
493
+ elif self.stage_type == StageType.COMPRESS:
494
+ return self.processor.compress(data)
495
+ elif self.stage_type == StageType.CHECKSUM:
496
+ return self._add_checksum(data)
497
+ else:
498
+ return data
499
+
500
+ def reverse(self, data: bytes) -> bytes:
501
+ """Reverse the stage processing."""
502
+ if not self.enabled:
503
+ return data
504
+
505
+ if self.stage_type == StageType.TRANSFORM:
506
+ return self.processor.reverse(data)
507
+ elif self.stage_type == StageType.COMPRESS:
508
+ return self.processor.decompress(data)
509
+ elif self.stage_type == StageType.CHECKSUM:
510
+ return self._verify_checksum(data)
511
+ else:
512
+ return data
513
+
514
+ def _add_checksum(self, data: bytes) -> bytes:
515
+ """Add checksum to data."""
516
+ checksum = hashlib.sha256(data).digest()
517
+ return checksum + data
518
+
519
+ def _verify_checksum(self, data: bytes) -> bytes:
520
+ """Verify and remove checksum."""
521
+ if len(data) < 32:
522
+ raise PipelineError("Data too short for checksum", self.name)
523
+
524
+ expected = data[:32]
525
+ actual_data = data[32:]
526
+ actual = hashlib.sha256(actual_data).digest()
527
+
528
+ if expected != actual:
529
+ raise PipelineError("Checksum verification failed", self.name)
530
+
531
+ return actual_data
532
+
533
+
534
+ # =============================================================================
535
+ # Pipeline Metrics
536
+ # =============================================================================
537
+
538
+
539
+ @dataclass
540
+ class PipelineMetrics:
541
+ """Metrics for pipeline processing.
542
+
543
+ Attributes:
544
+ total_time_ms: Total processing time.
545
+ stage_metrics: Per-stage metrics.
546
+ input_size: Original input size.
547
+ output_size: Final output size.
548
+ overall_ratio: Overall compression ratio.
549
+ """
550
+
551
+ total_time_ms: float = 0.0
552
+ stage_metrics: dict[str, dict[str, Any]] = field(default_factory=dict)
553
+ input_size: int = 0
554
+ output_size: int = 0
555
+ overall_ratio: float = 0.0
556
+
557
+ def add_stage_metric(self, stage_name: str, input_size: int, output_size: int, time_ms: float) -> None:
558
+ """Add metrics for a stage."""
559
+ self.stage_metrics[stage_name] = {
560
+ "input_size": input_size,
561
+ "output_size": output_size,
562
+ "time_ms": time_ms,
563
+ "ratio": input_size / output_size if output_size > 0 else 0.0,
564
+ }
565
+
566
+ def update_totals(self) -> None:
567
+ """Update total metrics."""
568
+ if self.output_size > 0:
569
+ self.overall_ratio = self.input_size / self.output_size
570
+
571
+ def to_dict(self) -> dict[str, Any]:
572
+ """Convert to dictionary."""
573
+ return {
574
+ "total_time_ms": round(self.total_time_ms, 2),
575
+ "input_size": self.input_size,
576
+ "output_size": self.output_size,
577
+ "overall_ratio": round(self.overall_ratio, 2),
578
+ "space_savings_percent": round((1 - self.output_size / self.input_size) * 100, 2) if self.input_size > 0 else 0.0,
579
+ "stages": self.stage_metrics,
580
+ }
581
+
582
+
583
+ # =============================================================================
584
+ # Pipeline Result
585
+ # =============================================================================
586
+
587
+
588
+ @dataclass
589
+ class PipelineResult:
590
+ """Result of pipeline processing.
591
+
592
+ Attributes:
593
+ data: Processed data.
594
+ metrics: Processing metrics.
595
+ stage_order: Order of stages applied.
596
+ config_snapshot: Configuration at time of processing.
597
+ """
598
+
599
+ data: bytes
600
+ metrics: PipelineMetrics
601
+ stage_order: list[str] = field(default_factory=list)
602
+ config_snapshot: dict[str, Any] = field(default_factory=dict)
603
+
604
+
605
+ # =============================================================================
606
+ # Compression Pipeline
607
+ # =============================================================================
608
+
609
+
610
+ class CompressionPipeline:
611
+ """Composable compression pipeline.
612
+
613
+ Allows chaining multiple transforms and compression stages for
614
+ optimal compression of different data types.
615
+
616
+ Example:
617
+ >>> pipeline = (
618
+ ... CompressionPipeline("json_pipeline")
619
+ ... .add_transform(DeduplicationTransform())
620
+ ... .add_compression(get_compressor(CompressionAlgorithm.ZSTD))
621
+ ... .add_checksum()
622
+ ... )
623
+ >>>
624
+ >>> result = pipeline.process(json_data)
625
+ >>> original = pipeline.reverse(result.data)
626
+ """
627
+
628
+ def __init__(self, name: str = "default") -> None:
629
+ """Initialize pipeline.
630
+
631
+ Args:
632
+ name: Pipeline name for identification.
633
+ """
634
+ self.name = name
635
+ self._stages: list[PipelineStage] = []
636
+ self._enabled = True
637
+
638
+ def add_stage(self, stage: PipelineStage) -> "CompressionPipeline":
639
+ """Add a stage to the pipeline.
640
+
641
+ Args:
642
+ stage: Pipeline stage to add.
643
+
644
+ Returns:
645
+ Self for chaining.
646
+ """
647
+ self._stages.append(stage)
648
+ return self
649
+
650
+ def add_transform(self, transform: Transform, name: str | None = None) -> "CompressionPipeline":
651
+ """Add a transform stage.
652
+
653
+ Args:
654
+ transform: Transform to add.
655
+ name: Optional stage name.
656
+
657
+ Returns:
658
+ Self for chaining.
659
+ """
660
+ stage_name = name or transform.name
661
+ stage = PipelineStage(name=stage_name, stage_type=StageType.TRANSFORM, processor=transform)
662
+ return self.add_stage(stage)
663
+
664
+ def add_compression(self, compressor: Compressor, name: str | None = None) -> "CompressionPipeline":
665
+ """Add a compression stage.
666
+
667
+ Args:
668
+ compressor: Compressor to add.
669
+ name: Optional stage name.
670
+
671
+ Returns:
672
+ Self for chaining.
673
+ """
674
+ stage_name = name or f"compress_{compressor.algorithm.value}"
675
+ stage = PipelineStage(name=stage_name, stage_type=StageType.COMPRESS, processor=compressor)
676
+ return self.add_stage(stage)
677
+
678
+ def add_checksum(self, name: str = "checksum") -> "CompressionPipeline":
679
+ """Add a checksum verification stage.
680
+
681
+ Args:
682
+ name: Stage name.
683
+
684
+ Returns:
685
+ Self for chaining.
686
+ """
687
+ stage = PipelineStage(name=name, stage_type=StageType.CHECKSUM, processor=None)
688
+ return self.add_stage(stage)
689
+
690
+ def process(self, data: bytes) -> PipelineResult:
691
+ """Process data through the pipeline.
692
+
693
+ Args:
694
+ data: Input data.
695
+
696
+ Returns:
697
+ Pipeline result with processed data and metrics.
698
+ """
699
+ import time
700
+
701
+ metrics = PipelineMetrics()
702
+ metrics.input_size = len(data)
703
+ stage_order: list[str] = []
704
+
705
+ start_total = time.perf_counter()
706
+ current_data = data
707
+
708
+ for stage in self._stages:
709
+ if not stage.enabled:
710
+ continue
711
+
712
+ stage_start = time.perf_counter()
713
+ input_size = len(current_data)
714
+
715
+ try:
716
+ current_data = stage.process(current_data)
717
+ except Exception as e:
718
+ raise PipelineError(f"Stage failed: {e}", stage.name)
719
+
720
+ stage_time = (time.perf_counter() - stage_start) * 1000
721
+ metrics.add_stage_metric(stage.name, input_size, len(current_data), stage_time)
722
+ stage_order.append(stage.name)
723
+
724
+ metrics.total_time_ms = (time.perf_counter() - start_total) * 1000
725
+ metrics.output_size = len(current_data)
726
+ metrics.update_totals()
727
+
728
+ return PipelineResult(
729
+ data=current_data,
730
+ metrics=metrics,
731
+ stage_order=stage_order,
732
+ config_snapshot=self._get_config_snapshot(),
733
+ )
734
+
735
+ def reverse(self, data: bytes) -> bytes:
736
+ """Reverse pipeline processing.
737
+
738
+ Args:
739
+ data: Processed data.
740
+
741
+ Returns:
742
+ Original data.
743
+ """
744
+ current_data = data
745
+
746
+ # Reverse stages in reverse order
747
+ for stage in reversed(self._stages):
748
+ if not stage.enabled:
749
+ continue
750
+
751
+ try:
752
+ current_data = stage.reverse(current_data)
753
+ except Exception as e:
754
+ raise PipelineError(f"Reverse stage failed: {e}", stage.name)
755
+
756
+ return current_data
757
+
758
+ def enable_stage(self, name: str, enabled: bool = True) -> "CompressionPipeline":
759
+ """Enable or disable a stage by name.
760
+
761
+ Args:
762
+ name: Stage name.
763
+ enabled: Whether to enable the stage.
764
+
765
+ Returns:
766
+ Self for chaining.
767
+ """
768
+ for stage in self._stages:
769
+ if stage.name == name:
770
+ stage.enabled = enabled
771
+ break
772
+ return self
773
+
774
+ def remove_stage(self, name: str) -> "CompressionPipeline":
775
+ """Remove a stage by name.
776
+
777
+ Args:
778
+ name: Stage name to remove.
779
+
780
+ Returns:
781
+ Self for chaining.
782
+ """
783
+ self._stages = [s for s in self._stages if s.name != name]
784
+ return self
785
+
786
+ @property
787
+ def stages(self) -> list[PipelineStage]:
788
+ """Get all stages."""
789
+ return self._stages.copy()
790
+
791
+ def _get_config_snapshot(self) -> dict[str, Any]:
792
+ """Get current configuration snapshot."""
793
+ return {
794
+ "name": self.name,
795
+ "stages": [
796
+ {
797
+ "name": s.name,
798
+ "type": s.stage_type.name,
799
+ "enabled": s.enabled,
800
+ }
801
+ for s in self._stages
802
+ ],
803
+ }
804
+
805
+ def clone(self, new_name: str | None = None) -> "CompressionPipeline":
806
+ """Create a copy of this pipeline.
807
+
808
+ Args:
809
+ new_name: Name for the cloned pipeline.
810
+
811
+ Returns:
812
+ Cloned pipeline.
813
+ """
814
+ import copy
815
+
816
+ cloned = CompressionPipeline(new_name or f"{self.name}_clone")
817
+ cloned._stages = copy.deepcopy(self._stages)
818
+ return cloned
819
+
820
+
821
+ # =============================================================================
822
+ # Pre-built Pipelines
823
+ # =============================================================================
824
+
825
+
826
+ def create_text_pipeline() -> CompressionPipeline:
827
+ """Create a pipeline optimized for text data."""
828
+ from truthound.stores.compression.providers import get_compressor
829
+
830
+ return CompressionPipeline("text").add_compression(get_compressor(CompressionAlgorithm.GZIP))
831
+
832
+
833
+ def create_json_pipeline() -> CompressionPipeline:
834
+ """Create a pipeline optimized for JSON data."""
835
+ from truthound.stores.compression.providers import get_compressor
836
+
837
+ return (
838
+ CompressionPipeline("json").add_transform(DeduplicationTransform(block_size=1024)).add_compression(get_compressor(CompressionAlgorithm.GZIP))
839
+ )
840
+
841
+
842
+ def create_binary_pipeline() -> CompressionPipeline:
843
+ """Create a pipeline optimized for binary data."""
844
+ from truthound.stores.compression.providers import get_compressor, is_algorithm_available
845
+
846
+ # Prefer ZSTD, fall back to GZIP
847
+ algo = CompressionAlgorithm.ZSTD if is_algorithm_available(CompressionAlgorithm.ZSTD) else CompressionAlgorithm.GZIP
848
+
849
+ return (
850
+ CompressionPipeline("binary")
851
+ .add_transform(RunLengthTransform())
852
+ .add_transform(DeduplicationTransform())
853
+ .add_compression(get_compressor(algo))
854
+ )
855
+
856
+
857
+ def create_timeseries_pipeline() -> CompressionPipeline:
858
+ """Create a pipeline optimized for time-series data."""
859
+ from truthound.stores.compression.providers import get_compressor, is_algorithm_available
860
+
861
+ # Prefer ZSTD, fall back to GZIP
862
+ algo = CompressionAlgorithm.ZSTD if is_algorithm_available(CompressionAlgorithm.ZSTD) else CompressionAlgorithm.GZIP
863
+
864
+ return (
865
+ CompressionPipeline("timeseries")
866
+ .add_transform(DeltaEncodingTransform())
867
+ .add_compression(get_compressor(algo))
868
+ )