truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1303 @@
1
+ """Caching layer for data profiling with file hash based caching.
2
+
3
+ This module provides a flexible caching system for profile results:
4
+ - File hash based cache key generation
5
+ - Multiple backend support (memory, file, Redis)
6
+ - TTL-based expiration
7
+ - Lazy evaluation with cache-through pattern
8
+
9
+ Key features:
10
+ - Pluggable backend architecture
11
+ - Content-based cache invalidation
12
+ - Compression support for large profiles
13
+ - Thread-safe operations
14
+
15
+ Example:
16
+ from truthound.profiler.caching import ProfileCache, FileHashCacheKey
17
+
18
+ # Create cache with memory backend
19
+ cache = ProfileCache()
20
+
21
+ # Generate cache key from file
22
+ key = FileHashCacheKey.from_file("data.parquet")
23
+
24
+ # Cache-through pattern
25
+ profile = cache.get_or_compute(key, lambda: expensive_profile())
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import gzip
31
+ import hashlib
32
+ import json
33
+ import os
34
+ import pickle
35
+ import shutil
36
+ import threading
37
+ import time
38
+ from abc import ABC, abstractmethod
39
+ from dataclasses import dataclass, field
40
+ from datetime import datetime, timedelta
41
+ from enum import Enum
42
+ from pathlib import Path
43
+ from typing import Any, Callable, Generic, Protocol, TypeVar
44
+
45
+ from truthound.profiler.base import TableProfile
46
+ from truthound.profiler.schema import ProfileSerializer
47
+
48
+
49
+ # =============================================================================
50
+ # Cache Key Protocol
51
+ # =============================================================================
52
+
53
+
54
+ class CacheKeyProtocol(Protocol):
55
+ """Protocol for cache keys."""
56
+
57
+ def to_string(self) -> str:
58
+ """Convert key to string representation."""
59
+ ...
60
+
61
+ def __hash__(self) -> int:
62
+ ...
63
+
64
+ def __eq__(self, other: object) -> bool:
65
+ ...
66
+
67
+
68
+ @dataclass(frozen=True)
69
+ class CacheKey:
70
+ """Base cache key implementation."""
71
+
72
+ key: str
73
+ namespace: str = "default"
74
+ version: str = "1"
75
+
76
+ def to_string(self) -> str:
77
+ """Create unique string representation."""
78
+ return f"{self.namespace}:{self.version}:{self.key}"
79
+
80
+ def __hash__(self) -> int:
81
+ return hash(self.to_string())
82
+
83
+
84
+ @dataclass(frozen=True)
85
+ class FileHashCacheKey(CacheKey):
86
+ """Cache key based on file content hash.
87
+
88
+ Uses SHA-256 to create a content-based cache key that
89
+ automatically invalidates when file contents change.
90
+
91
+ Attributes:
92
+ file_path: Original file path
93
+ file_hash: SHA-256 hash of file contents
94
+ file_size: File size in bytes
95
+ file_mtime: File modification time
96
+ """
97
+
98
+ file_path: str = ""
99
+ file_hash: str = ""
100
+ file_size: int = 0
101
+ file_mtime: float = 0.0
102
+
103
+ @classmethod
104
+ def from_file(
105
+ cls,
106
+ path: str | Path,
107
+ *,
108
+ namespace: str = "profile",
109
+ version: str = "1",
110
+ quick_hash: bool = False,
111
+ sample_size: int = 1024 * 1024, # 1MB sample for quick hash
112
+ ) -> "FileHashCacheKey":
113
+ """Create cache key from file.
114
+
115
+ Args:
116
+ path: Path to the file
117
+ namespace: Cache namespace
118
+ version: Cache version
119
+ quick_hash: If True, only hash first/last portions for speed
120
+ sample_size: Bytes to sample when using quick hash
121
+
122
+ Returns:
123
+ FileHashCacheKey instance
124
+ """
125
+ path = Path(path)
126
+ if not path.exists():
127
+ raise FileNotFoundError(f"File not found: {path}")
128
+
129
+ stat = path.stat()
130
+ file_size = stat.st_size
131
+ file_mtime = stat.st_mtime
132
+
133
+ # Calculate hash
134
+ if quick_hash and file_size > sample_size * 2:
135
+ # For large files, hash beginning, end, and size
136
+ file_hash = cls._quick_hash(path, sample_size)
137
+ else:
138
+ file_hash = cls._full_hash(path)
139
+
140
+ return cls(
141
+ key=file_hash,
142
+ namespace=namespace,
143
+ version=version,
144
+ file_path=str(path),
145
+ file_hash=file_hash,
146
+ file_size=file_size,
147
+ file_mtime=file_mtime,
148
+ )
149
+
150
+ @staticmethod
151
+ def _full_hash(path: Path, chunk_size: int = 8192) -> str:
152
+ """Calculate full file hash."""
153
+ hasher = hashlib.sha256()
154
+ with open(path, "rb") as f:
155
+ while chunk := f.read(chunk_size):
156
+ hasher.update(chunk)
157
+ return hasher.hexdigest()
158
+
159
+ @staticmethod
160
+ def _quick_hash(path: Path, sample_size: int) -> str:
161
+ """Calculate quick hash from file samples."""
162
+ hasher = hashlib.sha256()
163
+ file_size = path.stat().st_size
164
+
165
+ with open(path, "rb") as f:
166
+ # Hash beginning
167
+ hasher.update(f.read(sample_size))
168
+
169
+ # Hash end
170
+ f.seek(-sample_size, 2)
171
+ hasher.update(f.read(sample_size))
172
+
173
+ # Include size in hash
174
+ hasher.update(str(file_size).encode())
175
+
176
+ return hasher.hexdigest()
177
+
178
+ def to_string(self) -> str:
179
+ """Create unique string representation including file info."""
180
+ return f"{self.namespace}:{self.version}:{self.file_hash}:{self.file_size}"
181
+
182
+
183
+ @dataclass(frozen=True)
184
+ class DataFrameHashCacheKey(CacheKey):
185
+ """Cache key based on DataFrame content hash.
186
+
187
+ Creates a hash based on DataFrame schema and sample data.
188
+ """
189
+
190
+ schema_hash: str = ""
191
+ sample_hash: str = ""
192
+ row_count: int = 0
193
+ column_count: int = 0
194
+
195
+ @classmethod
196
+ def from_dataframe(
197
+ cls,
198
+ df: Any, # pl.DataFrame or similar
199
+ *,
200
+ namespace: str = "profile",
201
+ version: str = "1",
202
+ sample_rows: int = 1000,
203
+ ) -> "DataFrameHashCacheKey":
204
+ """Create cache key from DataFrame.
205
+
206
+ Args:
207
+ df: Polars DataFrame
208
+ namespace: Cache namespace
209
+ version: Cache version
210
+ sample_rows: Number of rows to sample for hash
211
+
212
+ Returns:
213
+ DataFrameHashCacheKey instance
214
+ """
215
+ import polars as pl
216
+
217
+ if not isinstance(df, (pl.DataFrame, pl.LazyFrame)):
218
+ raise TypeError(f"Expected Polars DataFrame, got {type(df)}")
219
+
220
+ if isinstance(df, pl.LazyFrame):
221
+ schema = df.collect_schema()
222
+ sample_df = df.head(sample_rows).collect()
223
+ else:
224
+ schema = df.schema
225
+ sample_df = df.head(sample_rows)
226
+
227
+ # Hash schema
228
+ schema_str = str(sorted(schema.items()))
229
+ schema_hash = hashlib.sha256(schema_str.encode()).hexdigest()[:16]
230
+
231
+ # Hash sample data
232
+ sample_bytes = sample_df.to_pandas().to_csv().encode()
233
+ sample_hash = hashlib.sha256(sample_bytes).hexdigest()[:16]
234
+
235
+ # Combined key
236
+ key = f"{schema_hash}:{sample_hash}:{len(sample_df)}:{len(schema)}"
237
+
238
+ return cls(
239
+ key=key,
240
+ namespace=namespace,
241
+ version=version,
242
+ schema_hash=schema_hash,
243
+ sample_hash=sample_hash,
244
+ row_count=len(sample_df),
245
+ column_count=len(schema),
246
+ )
247
+
248
+
249
+ # =============================================================================
250
+ # Cache Entry
251
+ # =============================================================================
252
+
253
+
254
+ @dataclass
255
+ class CacheEntry:
256
+ """Cached profile entry with metadata."""
257
+
258
+ profile: TableProfile
259
+ created_at: datetime = field(default_factory=datetime.now)
260
+ expires_at: datetime | None = None
261
+ access_count: int = 0
262
+ last_accessed: datetime = field(default_factory=datetime.now)
263
+ compressed: bool = False
264
+ size_bytes: int = 0
265
+ metadata: dict[str, Any] = field(default_factory=dict)
266
+
267
+ def is_expired(self) -> bool:
268
+ """Check if entry has expired."""
269
+ if self.expires_at is None:
270
+ return False
271
+ return datetime.now() > self.expires_at
272
+
273
+ def touch(self) -> None:
274
+ """Update access statistics."""
275
+ self.access_count += 1
276
+ self.last_accessed = datetime.now()
277
+
278
+ def to_dict(self) -> dict[str, Any]:
279
+ """Serialize to dictionary."""
280
+ serializer = ProfileSerializer()
281
+ return {
282
+ "profile": serializer.serialize(self.profile),
283
+ "created_at": self.created_at.isoformat(),
284
+ "expires_at": self.expires_at.isoformat() if self.expires_at else None,
285
+ "access_count": self.access_count,
286
+ "last_accessed": self.last_accessed.isoformat(),
287
+ "metadata": self.metadata,
288
+ }
289
+
290
+ @classmethod
291
+ def from_dict(cls, data: dict[str, Any]) -> "CacheEntry":
292
+ """Deserialize from dictionary."""
293
+ serializer = ProfileSerializer()
294
+ profile = serializer.deserialize(data["profile"])
295
+
296
+ expires_at = None
297
+ if data.get("expires_at"):
298
+ expires_at = datetime.fromisoformat(data["expires_at"])
299
+
300
+ return cls(
301
+ profile=profile,
302
+ created_at=datetime.fromisoformat(data["created_at"]),
303
+ expires_at=expires_at,
304
+ access_count=data.get("access_count", 0),
305
+ last_accessed=datetime.fromisoformat(data.get("last_accessed", data["created_at"])),
306
+ metadata=data.get("metadata", {}),
307
+ )
308
+
309
+
310
+ # =============================================================================
311
+ # Cache Backend Protocol
312
+ # =============================================================================
313
+
314
+
315
+ class CacheBackend(ABC):
316
+ """Abstract base class for cache backends.
317
+
318
+ Implement this to create custom cache backends (Redis, Memcached, etc.)
319
+ """
320
+
321
+ @abstractmethod
322
+ def get(self, key: str) -> CacheEntry | None:
323
+ """Retrieve entry from cache.
324
+
325
+ Args:
326
+ key: Cache key string
327
+
328
+ Returns:
329
+ CacheEntry if found, None otherwise
330
+ """
331
+ pass
332
+
333
+ @abstractmethod
334
+ def set(
335
+ self,
336
+ key: str,
337
+ entry: CacheEntry,
338
+ ttl: timedelta | None = None,
339
+ ) -> None:
340
+ """Store entry in cache.
341
+
342
+ Args:
343
+ key: Cache key string
344
+ entry: Entry to cache
345
+ ttl: Time-to-live for entry
346
+ """
347
+ pass
348
+
349
+ @abstractmethod
350
+ def delete(self, key: str) -> bool:
351
+ """Delete entry from cache.
352
+
353
+ Args:
354
+ key: Cache key string
355
+
356
+ Returns:
357
+ True if entry was deleted, False if not found
358
+ """
359
+ pass
360
+
361
+ @abstractmethod
362
+ def clear(self) -> int:
363
+ """Clear all entries from cache.
364
+
365
+ Returns:
366
+ Number of entries cleared
367
+ """
368
+ pass
369
+
370
+ @abstractmethod
371
+ def exists(self, key: str) -> bool:
372
+ """Check if key exists in cache.
373
+
374
+ Args:
375
+ key: Cache key string
376
+
377
+ Returns:
378
+ True if key exists
379
+ """
380
+ pass
381
+
382
+ def get_stats(self) -> dict[str, Any]:
383
+ """Get cache statistics.
384
+
385
+ Returns:
386
+ Dictionary with cache statistics
387
+ """
388
+ return {}
389
+
390
+
391
+ class MemoryCacheBackend(CacheBackend):
392
+ """In-memory cache backend with LRU eviction.
393
+
394
+ Thread-safe implementation using locks.
395
+
396
+ Attributes:
397
+ max_size: Maximum number of entries
398
+ max_memory_bytes: Maximum memory usage in bytes (0 = unlimited)
399
+ """
400
+
401
+ def __init__(
402
+ self,
403
+ *,
404
+ max_size: int = 1000,
405
+ max_memory_bytes: int = 0,
406
+ ):
407
+ self.max_size = max_size
408
+ self.max_memory_bytes = max_memory_bytes
409
+ self._cache: dict[str, CacheEntry] = {}
410
+ self._lock = threading.RLock()
411
+ self._hits = 0
412
+ self._misses = 0
413
+
414
+ def get(self, key: str) -> CacheEntry | None:
415
+ with self._lock:
416
+ entry = self._cache.get(key)
417
+ if entry is None:
418
+ self._misses += 1
419
+ return None
420
+
421
+ if entry.is_expired():
422
+ del self._cache[key]
423
+ self._misses += 1
424
+ return None
425
+
426
+ entry.touch()
427
+ self._hits += 1
428
+ return entry
429
+
430
+ def set(
431
+ self,
432
+ key: str,
433
+ entry: CacheEntry,
434
+ ttl: timedelta | None = None,
435
+ ) -> None:
436
+ with self._lock:
437
+ if ttl:
438
+ entry.expires_at = datetime.now() + ttl
439
+
440
+ self._cache[key] = entry
441
+
442
+ # Evict if over size
443
+ if len(self._cache) > self.max_size:
444
+ self._evict_lru()
445
+
446
+ def delete(self, key: str) -> bool:
447
+ with self._lock:
448
+ if key in self._cache:
449
+ del self._cache[key]
450
+ return True
451
+ return False
452
+
453
+ def clear(self) -> int:
454
+ with self._lock:
455
+ count = len(self._cache)
456
+ self._cache.clear()
457
+ return count
458
+
459
+ def exists(self, key: str) -> bool:
460
+ with self._lock:
461
+ entry = self._cache.get(key)
462
+ if entry is None:
463
+ return False
464
+ if entry.is_expired():
465
+ del self._cache[key]
466
+ return False
467
+ return True
468
+
469
+ def get_stats(self) -> dict[str, Any]:
470
+ with self._lock:
471
+ total = self._hits + self._misses
472
+ return {
473
+ "type": "memory",
474
+ "size": len(self._cache),
475
+ "max_size": self.max_size,
476
+ "hits": self._hits,
477
+ "misses": self._misses,
478
+ "hit_ratio": self._hits / total if total > 0 else 0.0,
479
+ }
480
+
481
+ def _evict_lru(self) -> None:
482
+ """Evict least recently used entries."""
483
+ if not self._cache:
484
+ return
485
+
486
+ # Find LRU entry
487
+ lru_key = min(
488
+ self._cache.keys(),
489
+ key=lambda k: self._cache[k].last_accessed,
490
+ )
491
+ del self._cache[lru_key]
492
+
493
+
494
+ class FileCacheBackend(CacheBackend):
495
+ """File-based cache backend with optional compression.
496
+
497
+ Stores cached profiles as JSON files with gzip compression.
498
+
499
+ Attributes:
500
+ cache_dir: Directory for cache files
501
+ compress: Whether to compress cache files
502
+ """
503
+
504
+ def __init__(
505
+ self,
506
+ cache_dir: str | Path = ".truthound_cache",
507
+ *,
508
+ compress: bool = True,
509
+ max_size_mb: int = 1000,
510
+ ):
511
+ self.cache_dir = Path(cache_dir)
512
+ self.compress = compress
513
+ self.max_size_mb = max_size_mb
514
+ self._lock = threading.RLock()
515
+ self._hits = 0
516
+ self._misses = 0
517
+
518
+ # Ensure cache directory exists
519
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
520
+
521
+ def _get_path(self, key: str) -> Path:
522
+ """Get file path for cache key."""
523
+ # Use hash to avoid filesystem issues with long keys
524
+ key_hash = hashlib.sha256(key.encode()).hexdigest()
525
+ suffix = ".json.gz" if self.compress else ".json"
526
+ return self.cache_dir / f"{key_hash}{suffix}"
527
+
528
+ def get(self, key: str) -> CacheEntry | None:
529
+ path = self._get_path(key)
530
+
531
+ with self._lock:
532
+ if not path.exists():
533
+ self._misses += 1
534
+ return None
535
+
536
+ try:
537
+ if self.compress:
538
+ with gzip.open(path, "rt", encoding="utf-8") as f:
539
+ data = json.load(f)
540
+ else:
541
+ with open(path, "r", encoding="utf-8") as f:
542
+ data = json.load(f)
543
+
544
+ entry = CacheEntry.from_dict(data)
545
+
546
+ if entry.is_expired():
547
+ path.unlink(missing_ok=True)
548
+ self._misses += 1
549
+ return None
550
+
551
+ entry.touch()
552
+ self._hits += 1
553
+
554
+ # Update file with new access stats
555
+ self._save_entry(path, entry)
556
+
557
+ return entry
558
+
559
+ except (json.JSONDecodeError, KeyError, OSError):
560
+ path.unlink(missing_ok=True)
561
+ self._misses += 1
562
+ return None
563
+
564
+ def set(
565
+ self,
566
+ key: str,
567
+ entry: CacheEntry,
568
+ ttl: timedelta | None = None,
569
+ ) -> None:
570
+ if ttl:
571
+ entry.expires_at = datetime.now() + ttl
572
+
573
+ path = self._get_path(key)
574
+
575
+ with self._lock:
576
+ self._save_entry(path, entry)
577
+
578
+ # Check cache size and cleanup if needed
579
+ self._maybe_cleanup()
580
+
581
+ def _save_entry(self, path: Path, entry: CacheEntry) -> None:
582
+ """Save entry to file."""
583
+ data = entry.to_dict()
584
+
585
+ if self.compress:
586
+ with gzip.open(path, "wt", encoding="utf-8") as f:
587
+ json.dump(data, f)
588
+ else:
589
+ with open(path, "w", encoding="utf-8") as f:
590
+ json.dump(data, f)
591
+
592
+ def delete(self, key: str) -> bool:
593
+ path = self._get_path(key)
594
+
595
+ with self._lock:
596
+ if path.exists():
597
+ path.unlink()
598
+ return True
599
+ return False
600
+
601
+ def clear(self) -> int:
602
+ with self._lock:
603
+ count = 0
604
+ for path in self.cache_dir.glob("*.json*"):
605
+ path.unlink()
606
+ count += 1
607
+ return count
608
+
609
+ def exists(self, key: str) -> bool:
610
+ path = self._get_path(key)
611
+ return path.exists()
612
+
613
+ def get_stats(self) -> dict[str, Any]:
614
+ with self._lock:
615
+ cache_files = list(self.cache_dir.glob("*.json*"))
616
+ total_size = sum(f.stat().st_size for f in cache_files)
617
+ total = self._hits + self._misses
618
+
619
+ return {
620
+ "type": "file",
621
+ "cache_dir": str(self.cache_dir),
622
+ "file_count": len(cache_files),
623
+ "total_size_mb": total_size / (1024 * 1024),
624
+ "hits": self._hits,
625
+ "misses": self._misses,
626
+ "hit_ratio": self._hits / total if total > 0 else 0.0,
627
+ }
628
+
629
+ def _maybe_cleanup(self) -> None:
630
+ """Clean up cache if over size limit."""
631
+ cache_files = list(self.cache_dir.glob("*.json*"))
632
+ total_size = sum(f.stat().st_size for f in cache_files)
633
+ max_bytes = self.max_size_mb * 1024 * 1024
634
+
635
+ if total_size <= max_bytes:
636
+ return
637
+
638
+ # Sort by modification time, delete oldest
639
+ cache_files.sort(key=lambda f: f.stat().st_mtime)
640
+
641
+ for path in cache_files:
642
+ if total_size <= max_bytes * 0.8: # Clean to 80%
643
+ break
644
+ size = path.stat().st_size
645
+ path.unlink()
646
+ total_size -= size
647
+
648
+
649
+ class RedisConnectionError(Exception):
650
+ """Raised when Redis connection fails."""
651
+
652
+ pass
653
+
654
+
655
+ class RedisCacheBackend(CacheBackend):
656
+ """Redis-based cache backend for distributed caching.
657
+
658
+ Requires redis package to be installed. Includes proper error
659
+ handling for connection failures and timeouts.
660
+
661
+ For production use with automatic fallback, consider using
662
+ `ResilientCacheBackend` from `truthound.profiler.resilience`.
663
+
664
+ Example:
665
+ backend = RedisCacheBackend(
666
+ host="localhost",
667
+ port=6379,
668
+ prefix="truthound:cache:",
669
+ connect_timeout=5.0,
670
+ socket_timeout=2.0,
671
+ )
672
+
673
+ Attributes:
674
+ host: Redis server hostname
675
+ port: Redis server port
676
+ prefix: Key prefix for namespace isolation
677
+ connection_info: Connection details for diagnostics
678
+ """
679
+
680
+ def __init__(
681
+ self,
682
+ host: str = "localhost",
683
+ port: int = 6379,
684
+ db: int = 0,
685
+ password: str | None = None,
686
+ prefix: str = "truthound:cache:",
687
+ connect_timeout: float = 5.0,
688
+ socket_timeout: float = 2.0,
689
+ retry_on_timeout: bool = True,
690
+ max_connections: int = 10,
691
+ health_check_interval: int = 30,
692
+ lazy_connect: bool = True,
693
+ **kwargs: Any,
694
+ ):
695
+ self.host = host
696
+ self.port = port
697
+ self.prefix = prefix
698
+ self._hits = 0
699
+ self._misses = 0
700
+ self._errors = 0
701
+ self._lock = threading.RLock()
702
+ self._connected = False
703
+ self._last_error: str | None = None
704
+ self._last_error_time: datetime | None = None
705
+
706
+ try:
707
+ import redis
708
+ from redis.exceptions import RedisError
709
+ self._redis_module = redis
710
+ self._RedisError = RedisError
711
+ except ImportError:
712
+ raise ImportError(
713
+ "Redis support requires the 'redis' package. "
714
+ "Install with: pip install redis"
715
+ )
716
+
717
+ # Create connection pool with timeout settings
718
+ try:
719
+ self._pool = redis.ConnectionPool(
720
+ host=host,
721
+ port=port,
722
+ db=db,
723
+ password=password,
724
+ decode_responses=False,
725
+ socket_connect_timeout=connect_timeout,
726
+ socket_timeout=socket_timeout,
727
+ retry_on_timeout=retry_on_timeout,
728
+ max_connections=max_connections,
729
+ health_check_interval=health_check_interval,
730
+ **kwargs,
731
+ )
732
+ self._client = redis.Redis(connection_pool=self._pool)
733
+
734
+ # Test connection unless lazy
735
+ if not lazy_connect:
736
+ self._client.ping()
737
+ self._connected = True
738
+
739
+ except Exception as e:
740
+ self._connected = False
741
+ self._last_error = str(e)
742
+ self._last_error_time = datetime.now()
743
+ if not lazy_connect:
744
+ raise RedisConnectionError(
745
+ f"Failed to connect to Redis at {host}:{port}: {e}"
746
+ ) from e
747
+
748
+ @property
749
+ def connection_info(self) -> dict[str, Any]:
750
+ """Get connection information."""
751
+ return {
752
+ "host": self.host,
753
+ "port": self.port,
754
+ "prefix": self.prefix,
755
+ "connected": self._connected,
756
+ "last_error": self._last_error,
757
+ "last_error_time": (
758
+ self._last_error_time.isoformat()
759
+ if self._last_error_time else None
760
+ ),
761
+ }
762
+
763
+ def _make_key(self, key: str) -> str:
764
+ """Create Redis key with prefix."""
765
+ return f"{self.prefix}{key}"
766
+
767
+ def _handle_error(self, e: Exception, operation: str) -> None:
768
+ """Handle and record errors."""
769
+ with self._lock:
770
+ self._errors += 1
771
+ self._last_error = f"{operation}: {e}"
772
+ self._last_error_time = datetime.now()
773
+
774
+ # Check if it's a connection error
775
+ if "Connection" in str(type(e).__name__) or "Timeout" in str(type(e).__name__):
776
+ self._connected = False
777
+
778
+ def ping(self) -> bool:
779
+ """Check if Redis is reachable.
780
+
781
+ Returns:
782
+ True if Redis responds to ping
783
+ """
784
+ try:
785
+ self._client.ping()
786
+ self._connected = True
787
+ return True
788
+ except Exception as e:
789
+ self._handle_error(e, "ping")
790
+ return False
791
+
792
+ def get(self, key: str) -> CacheEntry | None:
793
+ redis_key = self._make_key(key)
794
+
795
+ try:
796
+ data = self._client.get(redis_key)
797
+ self._connected = True
798
+ except self._RedisError as e:
799
+ self._handle_error(e, "get")
800
+ raise RedisConnectionError(f"Redis get failed: {e}") from e
801
+
802
+ if data is None:
803
+ with self._lock:
804
+ self._misses += 1
805
+ return None
806
+
807
+ try:
808
+ entry_dict = json.loads(data.decode("utf-8"))
809
+ entry = CacheEntry.from_dict(entry_dict)
810
+
811
+ if entry.is_expired():
812
+ try:
813
+ self._client.delete(redis_key)
814
+ except self._RedisError:
815
+ pass # Ignore delete errors for expired entries
816
+ with self._lock:
817
+ self._misses += 1
818
+ return None
819
+
820
+ entry.touch()
821
+ with self._lock:
822
+ self._hits += 1
823
+
824
+ return entry
825
+
826
+ except (json.JSONDecodeError, KeyError, TypeError) as e:
827
+ # Corrupted entry - try to delete
828
+ try:
829
+ self._client.delete(redis_key)
830
+ except self._RedisError:
831
+ pass
832
+ with self._lock:
833
+ self._misses += 1
834
+ return None
835
+
836
+ def set(
837
+ self,
838
+ key: str,
839
+ entry: CacheEntry,
840
+ ttl: timedelta | None = None,
841
+ ) -> None:
842
+ if ttl:
843
+ entry.expires_at = datetime.now() + ttl
844
+
845
+ redis_key = self._make_key(key)
846
+
847
+ try:
848
+ data = json.dumps(entry.to_dict()).encode("utf-8")
849
+ except (TypeError, ValueError) as e:
850
+ raise ValueError(f"Failed to serialize cache entry: {e}") from e
851
+
852
+ try:
853
+ if ttl:
854
+ self._client.setex(redis_key, ttl, data)
855
+ else:
856
+ self._client.set(redis_key, data)
857
+ self._connected = True
858
+ except self._RedisError as e:
859
+ self._handle_error(e, "set")
860
+ raise RedisConnectionError(f"Redis set failed: {e}") from e
861
+
862
+ def delete(self, key: str) -> bool:
863
+ redis_key = self._make_key(key)
864
+ try:
865
+ result = self._client.delete(redis_key) > 0
866
+ self._connected = True
867
+ return result
868
+ except self._RedisError as e:
869
+ self._handle_error(e, "delete")
870
+ raise RedisConnectionError(f"Redis delete failed: {e}") from e
871
+
872
+ def clear(self) -> int:
873
+ pattern = f"{self.prefix}*"
874
+ try:
875
+ keys = self._client.keys(pattern)
876
+ if keys:
877
+ result = self._client.delete(*keys)
878
+ self._connected = True
879
+ return result
880
+ return 0
881
+ except self._RedisError as e:
882
+ self._handle_error(e, "clear")
883
+ raise RedisConnectionError(f"Redis clear failed: {e}") from e
884
+
885
+ def exists(self, key: str) -> bool:
886
+ redis_key = self._make_key(key)
887
+ try:
888
+ result = self._client.exists(redis_key) > 0
889
+ self._connected = True
890
+ return result
891
+ except self._RedisError as e:
892
+ self._handle_error(e, "exists")
893
+ raise RedisConnectionError(f"Redis exists failed: {e}") from e
894
+
895
+ def get_stats(self) -> dict[str, Any]:
896
+ with self._lock:
897
+ total = self._hits + self._misses
898
+
899
+ stats = {
900
+ "type": "redis",
901
+ "host": self.host,
902
+ "port": self.port,
903
+ "prefix": self.prefix,
904
+ "connected": self._connected,
905
+ "hits": self._hits,
906
+ "misses": self._misses,
907
+ "errors": self._errors,
908
+ "hit_ratio": self._hits / total if total > 0 else 0.0,
909
+ }
910
+
911
+ # Try to get key count
912
+ try:
913
+ pattern = f"{self.prefix}*"
914
+ keys = self._client.keys(pattern)
915
+ stats["key_count"] = len(keys)
916
+ except self._RedisError:
917
+ stats["key_count"] = -1
918
+
919
+ if self._last_error:
920
+ stats["last_error"] = self._last_error
921
+ stats["last_error_time"] = (
922
+ self._last_error_time.isoformat()
923
+ if self._last_error_time else None
924
+ )
925
+
926
+ return stats
927
+
928
+ def close(self) -> None:
929
+ """Close the connection pool."""
930
+ try:
931
+ self._pool.disconnect()
932
+ self._connected = False
933
+ except Exception:
934
+ pass
935
+
936
+
937
+ # =============================================================================
938
+ # Cache Backend Registry
939
+ # =============================================================================
940
+
941
+
942
+ class CacheBackendRegistry:
943
+ """Registry for cache backend factories.
944
+
945
+ Allows registration of custom backend types.
946
+
947
+ Example:
948
+ registry = CacheBackendRegistry()
949
+ registry.register("custom", CustomBackend)
950
+ backend = registry.create("custom", **kwargs)
951
+ """
952
+
953
+ def __init__(self) -> None:
954
+ self._backends: dict[str, type[CacheBackend]] = {}
955
+
956
+ def register(
957
+ self,
958
+ name: str,
959
+ backend_class: type[CacheBackend],
960
+ ) -> None:
961
+ """Register a backend class."""
962
+ self._backends[name] = backend_class
963
+
964
+ def create(self, name: str, **kwargs: Any) -> CacheBackend:
965
+ """Create a backend instance."""
966
+ if name not in self._backends:
967
+ raise KeyError(
968
+ f"Unknown cache backend: {name}. "
969
+ f"Available: {list(self._backends.keys())}"
970
+ )
971
+ return self._backends[name](**kwargs)
972
+
973
+ def list_backends(self) -> list[str]:
974
+ """List registered backend names."""
975
+ return list(self._backends.keys())
976
+
977
+
978
+ # Global registry with default backends
979
+ cache_backend_registry = CacheBackendRegistry()
980
+ cache_backend_registry.register("memory", MemoryCacheBackend)
981
+ cache_backend_registry.register("file", FileCacheBackend)
982
+ cache_backend_registry.register("redis", RedisCacheBackend)
983
+
984
+
985
+ # =============================================================================
986
+ # Profile Cache
987
+ # =============================================================================
988
+
989
+
990
+ @dataclass
991
+ class CacheConfig:
992
+ """Configuration for profile caching."""
993
+
994
+ backend: str = "memory"
995
+ backend_options: dict[str, Any] = field(default_factory=dict)
996
+ default_ttl: timedelta | None = None
997
+ enabled: bool = True
998
+ compression: bool = True
999
+
1000
+
1001
+ class ProfileCache:
1002
+ """High-level profile caching with cache-through pattern.
1003
+
1004
+ This is the main interface for caching profile results.
1005
+ It wraps a cache backend and provides convenience methods.
1006
+
1007
+ Example:
1008
+ # Create cache with default memory backend
1009
+ cache = ProfileCache()
1010
+
1011
+ # Or with file backend
1012
+ cache = ProfileCache(
1013
+ backend="file",
1014
+ backend_options={"cache_dir": ".cache"}
1015
+ )
1016
+
1017
+ # Cache-through pattern
1018
+ key = FileHashCacheKey.from_file("data.parquet")
1019
+ profile = cache.get_or_compute(
1020
+ key,
1021
+ compute_fn=lambda: profile_file("data.parquet")
1022
+ )
1023
+ """
1024
+
1025
+ def __init__(
1026
+ self,
1027
+ backend: str | CacheBackend = "memory",
1028
+ backend_options: dict[str, Any] | None = None,
1029
+ default_ttl: timedelta | None = None,
1030
+ enabled: bool = True,
1031
+ ):
1032
+ """Initialize profile cache.
1033
+
1034
+ Args:
1035
+ backend: Backend name or instance
1036
+ backend_options: Options for backend construction
1037
+ default_ttl: Default time-to-live for entries
1038
+ enabled: Whether caching is enabled
1039
+ """
1040
+ self.enabled = enabled
1041
+ self.default_ttl = default_ttl
1042
+
1043
+ if isinstance(backend, CacheBackend):
1044
+ self._backend = backend
1045
+ else:
1046
+ options = backend_options or {}
1047
+ self._backend = cache_backend_registry.create(backend, **options)
1048
+
1049
+ @property
1050
+ def backend(self) -> CacheBackend:
1051
+ """Access the underlying backend."""
1052
+ return self._backend
1053
+
1054
+ def get(self, key: CacheKeyProtocol) -> TableProfile | None:
1055
+ """Get profile from cache.
1056
+
1057
+ Args:
1058
+ key: Cache key
1059
+
1060
+ Returns:
1061
+ Cached profile or None
1062
+ """
1063
+ if not self.enabled:
1064
+ return None
1065
+
1066
+ entry = self._backend.get(key.to_string())
1067
+ return entry.profile if entry else None
1068
+
1069
+ def set(
1070
+ self,
1071
+ key: CacheKeyProtocol,
1072
+ profile: TableProfile,
1073
+ ttl: timedelta | None = None,
1074
+ metadata: dict[str, Any] | None = None,
1075
+ ) -> None:
1076
+ """Store profile in cache.
1077
+
1078
+ Args:
1079
+ key: Cache key
1080
+ profile: Profile to cache
1081
+ ttl: Time-to-live (uses default if not specified)
1082
+ metadata: Additional metadata to store
1083
+ """
1084
+ if not self.enabled:
1085
+ return
1086
+
1087
+ entry = CacheEntry(
1088
+ profile=profile,
1089
+ metadata=metadata or {},
1090
+ )
1091
+
1092
+ self._backend.set(
1093
+ key.to_string(),
1094
+ entry,
1095
+ ttl=ttl or self.default_ttl,
1096
+ )
1097
+
1098
+ def get_or_compute(
1099
+ self,
1100
+ key: CacheKeyProtocol,
1101
+ compute_fn: Callable[[], TableProfile],
1102
+ ttl: timedelta | None = None,
1103
+ metadata: dict[str, Any] | None = None,
1104
+ ) -> TableProfile:
1105
+ """Get from cache or compute and cache.
1106
+
1107
+ This implements the cache-through pattern:
1108
+ 1. Try to get from cache
1109
+ 2. If miss, compute the profile
1110
+ 3. Store in cache
1111
+ 4. Return the profile
1112
+
1113
+ Args:
1114
+ key: Cache key
1115
+ compute_fn: Function to compute profile on cache miss
1116
+ ttl: Time-to-live for cached entry
1117
+ metadata: Additional metadata to store
1118
+
1119
+ Returns:
1120
+ Cached or computed profile
1121
+ """
1122
+ # Try cache first
1123
+ cached = self.get(key)
1124
+ if cached is not None:
1125
+ return cached
1126
+
1127
+ # Compute profile
1128
+ profile = compute_fn()
1129
+
1130
+ # Store in cache
1131
+ self.set(key, profile, ttl=ttl, metadata=metadata)
1132
+
1133
+ return profile
1134
+
1135
+ def invalidate(self, key: CacheKeyProtocol) -> bool:
1136
+ """Invalidate a cache entry.
1137
+
1138
+ Args:
1139
+ key: Cache key
1140
+
1141
+ Returns:
1142
+ True if entry was invalidated
1143
+ """
1144
+ return self._backend.delete(key.to_string())
1145
+
1146
+ def invalidate_by_pattern(self, pattern: str) -> int:
1147
+ """Invalidate entries matching a pattern.
1148
+
1149
+ Note: Only supported by some backends.
1150
+
1151
+ Args:
1152
+ pattern: Pattern to match (glob-style)
1153
+
1154
+ Returns:
1155
+ Number of entries invalidated
1156
+ """
1157
+ # This is a simplified implementation
1158
+ # Full pattern matching would require backend support
1159
+ return 0
1160
+
1161
+ def clear(self) -> int:
1162
+ """Clear all cache entries.
1163
+
1164
+ Returns:
1165
+ Number of entries cleared
1166
+ """
1167
+ return self._backend.clear()
1168
+
1169
+ def get_stats(self) -> dict[str, Any]:
1170
+ """Get cache statistics.
1171
+
1172
+ Returns:
1173
+ Dictionary with cache statistics
1174
+ """
1175
+ stats = self._backend.get_stats()
1176
+ stats["enabled"] = self.enabled
1177
+ stats["default_ttl_seconds"] = (
1178
+ self.default_ttl.total_seconds() if self.default_ttl else None
1179
+ )
1180
+ return stats
1181
+
1182
+ def warm(
1183
+ self,
1184
+ keys: list[CacheKeyProtocol],
1185
+ compute_fn: Callable[[CacheKeyProtocol], TableProfile],
1186
+ *,
1187
+ parallel: bool = False,
1188
+ ) -> dict[str, bool]:
1189
+ """Warm cache with multiple entries.
1190
+
1191
+ Args:
1192
+ keys: Cache keys to warm
1193
+ compute_fn: Function to compute each profile
1194
+ parallel: Whether to compute in parallel
1195
+
1196
+ Returns:
1197
+ Dictionary mapping key strings to success status
1198
+ """
1199
+ results: dict[str, bool] = {}
1200
+
1201
+ for key in keys:
1202
+ key_str = key.to_string()
1203
+ try:
1204
+ if not self._backend.exists(key_str):
1205
+ profile = compute_fn(key)
1206
+ self.set(key, profile)
1207
+ results[key_str] = True
1208
+ except Exception:
1209
+ results[key_str] = False
1210
+
1211
+ return results
1212
+
1213
+
1214
+ # =============================================================================
1215
+ # Caching Decorator
1216
+ # =============================================================================
1217
+
1218
+
1219
+ def cached_profile(
1220
+ cache: ProfileCache | None = None,
1221
+ ttl: timedelta | None = None,
1222
+ key_fn: Callable[..., CacheKeyProtocol] | None = None,
1223
+ ) -> Callable:
1224
+ """Decorator to cache profile function results.
1225
+
1226
+ Example:
1227
+ cache = ProfileCache()
1228
+
1229
+ @cached_profile(cache, ttl=timedelta(hours=1))
1230
+ def profile_file(path: str) -> TableProfile:
1231
+ # expensive profiling...
1232
+ return profile
1233
+
1234
+ Args:
1235
+ cache: ProfileCache instance (creates default if not provided)
1236
+ ttl: Time-to-live for cached entries
1237
+ key_fn: Function to generate cache key from arguments
1238
+
1239
+ Returns:
1240
+ Decorated function
1241
+ """
1242
+ _cache = cache or ProfileCache()
1243
+
1244
+ def decorator(func: Callable[..., TableProfile]) -> Callable[..., TableProfile]:
1245
+ def wrapper(*args: Any, **kwargs: Any) -> TableProfile:
1246
+ # Generate cache key
1247
+ if key_fn:
1248
+ key = key_fn(*args, **kwargs)
1249
+ else:
1250
+ # Default: use first argument as file path
1251
+ if args and isinstance(args[0], (str, Path)):
1252
+ key = FileHashCacheKey.from_file(args[0])
1253
+ else:
1254
+ # Fallback to function call hash
1255
+ call_hash = hashlib.sha256(
1256
+ f"{func.__name__}:{args}:{kwargs}".encode()
1257
+ ).hexdigest()
1258
+ key = CacheKey(key=call_hash)
1259
+
1260
+ return _cache.get_or_compute(
1261
+ key,
1262
+ compute_fn=lambda: func(*args, **kwargs),
1263
+ ttl=ttl,
1264
+ )
1265
+
1266
+ return wrapper
1267
+
1268
+ return decorator
1269
+
1270
+
1271
+ # =============================================================================
1272
+ # Convenience Functions
1273
+ # =============================================================================
1274
+
1275
+
1276
+ def create_cache(
1277
+ backend: str = "memory",
1278
+ **kwargs: Any,
1279
+ ) -> ProfileCache:
1280
+ """Create a ProfileCache with the specified backend.
1281
+
1282
+ Args:
1283
+ backend: Backend type ("memory", "file", "redis")
1284
+ **kwargs: Backend-specific options
1285
+
1286
+ Returns:
1287
+ Configured ProfileCache instance
1288
+ """
1289
+ return ProfileCache(backend=backend, backend_options=kwargs)
1290
+
1291
+
1292
+ def hash_file(path: str | Path, quick: bool = False) -> str:
1293
+ """Calculate file content hash.
1294
+
1295
+ Args:
1296
+ path: Path to file
1297
+ quick: Use quick hash for large files
1298
+
1299
+ Returns:
1300
+ SHA-256 hash string
1301
+ """
1302
+ key = FileHashCacheKey.from_file(path, quick_hash=quick)
1303
+ return key.file_hash