truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,733 @@
1
+ """Reference data caching for drift and anomaly validators.
2
+
3
+ This module provides memory-efficient caching mechanisms for reference data
4
+ used in drift detection and anomaly detection validators.
5
+
6
+ Key Features:
7
+ - LRU cache with configurable size limits
8
+ - Statistics summary storage (no raw data retention)
9
+ - Thread-safe operations
10
+ - Automatic cache invalidation
11
+ - Memory tracking and limits
12
+
13
+ Usage:
14
+ from truthound.validators.cache import (
15
+ ReferenceCache,
16
+ ReferenceStatistics,
17
+ CacheConfig,
18
+ )
19
+
20
+ # Configure cache
21
+ config = CacheConfig(max_entries=100, max_memory_mb=512)
22
+ cache = ReferenceCache(config)
23
+
24
+ # Store reference statistics (not raw data)
25
+ stats = ReferenceStatistics.from_lazyframe(lf, column="price")
26
+ cache.put("model_v1:price", stats)
27
+
28
+ # Retrieve cached statistics
29
+ cached_stats = cache.get("model_v1:price")
30
+ """
31
+
32
+ from dataclasses import dataclass, field
33
+ from typing import Any
34
+ from collections import OrderedDict
35
+ from threading import RLock
36
+ from functools import lru_cache
37
+ import hashlib
38
+ import time
39
+ import sys
40
+
41
+ import polars as pl
42
+ import numpy as np
43
+
44
+
45
+ # ============================================================================
46
+ # Configuration
47
+ # ============================================================================
48
+
49
+ @dataclass(frozen=True)
50
+ class CacheConfig:
51
+ """Configuration for reference data cache.
52
+
53
+ Attributes:
54
+ max_entries: Maximum number of cache entries
55
+ max_memory_mb: Maximum memory usage in MB (approximate)
56
+ ttl_seconds: Time-to-live for cache entries (None = no expiration)
57
+ enable_statistics_summary: Store statistics summary instead of raw data
58
+ n_histogram_bins: Number of bins for histogram caching
59
+ quantiles: Quantiles to cache (for drift detection)
60
+ """
61
+ max_entries: int = 100
62
+ max_memory_mb: float = 512.0
63
+ ttl_seconds: float | None = 3600.0 # 1 hour default
64
+ enable_statistics_summary: bool = True
65
+ n_histogram_bins: int = 50
66
+ quantiles: tuple[float, ...] = (0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99)
67
+
68
+
69
+ # ============================================================================
70
+ # Statistics Summary Classes
71
+ # ============================================================================
72
+
73
+ @dataclass
74
+ class NumericStatistics:
75
+ """Cached statistics for a numeric column.
76
+
77
+ This replaces storing raw reference data with a compact statistical summary.
78
+ Memory usage: ~1KB per column vs. potentially GB for raw data.
79
+ """
80
+ # Basic statistics
81
+ count: int
82
+ null_count: int
83
+ mean: float
84
+ std: float
85
+ variance: float
86
+ min_value: float
87
+ max_value: float
88
+ sum_value: float
89
+
90
+ # Quantiles (configurable)
91
+ quantiles: dict[float, float] # {0.5: 50.0, 0.95: 95.0, ...}
92
+
93
+ # Histogram for PSI/distribution comparison
94
+ histogram_edges: list[float] # Bin edges
95
+ histogram_counts: list[float] # Normalized frequencies
96
+
97
+ # Metadata
98
+ created_at: float = field(default_factory=time.time)
99
+ source_hash: str = "" # Hash of source data for validation
100
+
101
+ def __post_init__(self) -> None:
102
+ """Validate statistics."""
103
+ if self.count < 0:
104
+ raise ValueError("count must be non-negative")
105
+
106
+ def estimate_memory_bytes(self) -> int:
107
+ """Estimate memory usage of this statistics object."""
108
+ # Base dataclass fields
109
+ base_size = sys.getsizeof(self)
110
+ # Quantiles dict
111
+ quantiles_size = sys.getsizeof(self.quantiles) + sum(
112
+ sys.getsizeof(k) + sys.getsizeof(v)
113
+ for k, v in self.quantiles.items()
114
+ )
115
+ # Histogram lists
116
+ hist_size = (
117
+ sys.getsizeof(self.histogram_edges) +
118
+ len(self.histogram_edges) * 8 + # float64
119
+ sys.getsizeof(self.histogram_counts) +
120
+ len(self.histogram_counts) * 8
121
+ )
122
+ return base_size + quantiles_size + hist_size
123
+
124
+ @classmethod
125
+ def from_series(
126
+ cls,
127
+ series: pl.Series,
128
+ n_bins: int = 50,
129
+ quantiles: tuple[float, ...] = (0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99),
130
+ ) -> "NumericStatistics":
131
+ """Create statistics summary from a Polars Series.
132
+
133
+ Args:
134
+ series: Numeric Polars Series
135
+ n_bins: Number of histogram bins
136
+ quantiles: Quantiles to compute
137
+
138
+ Returns:
139
+ NumericStatistics instance
140
+ """
141
+ # Drop nulls for calculations
142
+ non_null = series.drop_nulls()
143
+ arr = non_null.to_numpy()
144
+
145
+ if len(arr) == 0:
146
+ return cls(
147
+ count=0,
148
+ null_count=len(series),
149
+ mean=0.0,
150
+ std=0.0,
151
+ variance=0.0,
152
+ min_value=0.0,
153
+ max_value=0.0,
154
+ sum_value=0.0,
155
+ quantiles={},
156
+ histogram_edges=[],
157
+ histogram_counts=[],
158
+ )
159
+
160
+ # Basic statistics
161
+ count = len(non_null)
162
+ null_count = len(series) - count
163
+ mean_val = float(np.mean(arr))
164
+ std_val = float(np.std(arr))
165
+ var_val = float(np.var(arr))
166
+ min_val = float(np.min(arr))
167
+ max_val = float(np.max(arr))
168
+ sum_val = float(np.sum(arr))
169
+
170
+ # Quantiles
171
+ quantile_values = {}
172
+ for q in quantiles:
173
+ quantile_values[q] = float(np.percentile(arr, q * 100))
174
+
175
+ # Histogram (quantile-based bins for robustness)
176
+ percentiles = np.linspace(0, 100, n_bins + 1)
177
+ edges = np.percentile(arr, percentiles)
178
+ # Ensure unique edges
179
+ edges = np.unique(edges)
180
+
181
+ if len(edges) >= 2:
182
+ counts, _ = np.histogram(arr, bins=edges)
183
+ total = counts.sum()
184
+ frequencies = (counts / total).tolist() if total > 0 else [0.0] * (len(edges) - 1)
185
+ else:
186
+ edges = [min_val, max_val] if min_val != max_val else [min_val]
187
+ frequencies = [1.0] if len(edges) == 2 else []
188
+
189
+ # Create hash for validation
190
+ source_hash = hashlib.md5(
191
+ f"{count}:{mean_val:.6f}:{std_val:.6f}".encode()
192
+ ).hexdigest()[:16]
193
+
194
+ return cls(
195
+ count=count,
196
+ null_count=null_count,
197
+ mean=mean_val,
198
+ std=std_val,
199
+ variance=var_val,
200
+ min_value=min_val,
201
+ max_value=max_val,
202
+ sum_value=sum_val,
203
+ quantiles=quantile_values,
204
+ histogram_edges=edges.tolist(),
205
+ histogram_counts=frequencies,
206
+ source_hash=source_hash,
207
+ )
208
+
209
+ @classmethod
210
+ def from_lazyframe(
211
+ cls,
212
+ lf: pl.LazyFrame,
213
+ column: str,
214
+ n_bins: int = 50,
215
+ quantiles: tuple[float, ...] = (0.01, 0.05, 0.1, 0.25, 0.5, 0.75, 0.9, 0.95, 0.99),
216
+ ) -> "NumericStatistics":
217
+ """Create statistics summary from a LazyFrame column.
218
+
219
+ Args:
220
+ lf: Polars LazyFrame
221
+ column: Column name
222
+ n_bins: Number of histogram bins
223
+ quantiles: Quantiles to compute
224
+
225
+ Returns:
226
+ NumericStatistics instance
227
+ """
228
+ series = lf.select(pl.col(column)).collect().to_series()
229
+ return cls.from_series(series, n_bins, quantiles)
230
+
231
+
232
+ @dataclass
233
+ class CategoricalStatistics:
234
+ """Cached statistics for a categorical column."""
235
+
236
+ # Category frequencies
237
+ frequencies: dict[str, float] # {category: frequency}
238
+
239
+ # Basic counts
240
+ count: int
241
+ null_count: int
242
+ unique_count: int
243
+
244
+ # Top categories (for quick access)
245
+ top_categories: list[tuple[str, float]] # [(category, frequency), ...]
246
+
247
+ # Metadata
248
+ created_at: float = field(default_factory=time.time)
249
+ source_hash: str = ""
250
+
251
+ def estimate_memory_bytes(self) -> int:
252
+ """Estimate memory usage."""
253
+ base_size = sys.getsizeof(self)
254
+ freq_size = sys.getsizeof(self.frequencies) + sum(
255
+ sys.getsizeof(k) + sys.getsizeof(v)
256
+ for k, v in self.frequencies.items()
257
+ )
258
+ top_size = sys.getsizeof(self.top_categories) + sum(
259
+ sys.getsizeof(t) for t in self.top_categories
260
+ )
261
+ return base_size + freq_size + top_size
262
+
263
+ @classmethod
264
+ def from_series(
265
+ cls,
266
+ series: pl.Series,
267
+ top_n: int = 100,
268
+ ) -> "CategoricalStatistics":
269
+ """Create statistics summary from a Polars Series.
270
+
271
+ Args:
272
+ series: Categorical Polars Series
273
+ top_n: Number of top categories to store
274
+
275
+ Returns:
276
+ CategoricalStatistics instance
277
+ """
278
+ non_null = series.drop_nulls()
279
+
280
+ if len(non_null) == 0:
281
+ return cls(
282
+ frequencies={},
283
+ count=0,
284
+ null_count=len(series),
285
+ unique_count=0,
286
+ top_categories=[],
287
+ )
288
+
289
+ # Value counts
290
+ value_counts = non_null.value_counts()
291
+ total = len(non_null)
292
+
293
+ frequencies = {}
294
+ for row in value_counts.iter_rows():
295
+ category, count = row
296
+ frequencies[str(category)] = count / total
297
+
298
+ # Sort by frequency for top categories
299
+ sorted_cats = sorted(frequencies.items(), key=lambda x: x[1], reverse=True)
300
+ top_categories = sorted_cats[:top_n]
301
+
302
+ # Create hash
303
+ source_hash = hashlib.md5(
304
+ f"{total}:{len(frequencies)}".encode()
305
+ ).hexdigest()[:16]
306
+
307
+ return cls(
308
+ frequencies=frequencies,
309
+ count=len(non_null),
310
+ null_count=len(series) - len(non_null),
311
+ unique_count=len(frequencies),
312
+ top_categories=top_categories,
313
+ source_hash=source_hash,
314
+ )
315
+
316
+ @classmethod
317
+ def from_lazyframe(
318
+ cls,
319
+ lf: pl.LazyFrame,
320
+ column: str,
321
+ top_n: int = 100,
322
+ ) -> "CategoricalStatistics":
323
+ """Create statistics summary from a LazyFrame column."""
324
+ series = lf.select(pl.col(column)).collect().to_series()
325
+ return cls.from_series(series, top_n)
326
+
327
+
328
+ @dataclass
329
+ class MultiColumnStatistics:
330
+ """Cached statistics for multiple columns (for multivariate analysis)."""
331
+
332
+ # Per-column statistics
333
+ column_stats: dict[str, NumericStatistics]
334
+
335
+ # Correlation matrix (upper triangle only to save space)
336
+ correlation_matrix: dict[tuple[str, str], float]
337
+
338
+ # Covariance matrix summary
339
+ covariance_matrix: dict[tuple[str, str], float]
340
+
341
+ # Column medians and IQRs for normalization
342
+ medians: dict[str, float]
343
+ iqrs: dict[str, float]
344
+
345
+ # Metadata
346
+ columns: list[str]
347
+ created_at: float = field(default_factory=time.time)
348
+
349
+ def estimate_memory_bytes(self) -> int:
350
+ """Estimate memory usage."""
351
+ base_size = sys.getsizeof(self)
352
+ stats_size = sum(s.estimate_memory_bytes() for s in self.column_stats.values())
353
+ matrix_size = (
354
+ sys.getsizeof(self.correlation_matrix) +
355
+ len(self.correlation_matrix) * 24 # Approximate per entry
356
+ )
357
+ return base_size + stats_size + matrix_size
358
+
359
+ @classmethod
360
+ def from_lazyframe(
361
+ cls,
362
+ lf: pl.LazyFrame,
363
+ columns: list[str],
364
+ n_bins: int = 50,
365
+ ) -> "MultiColumnStatistics":
366
+ """Create multi-column statistics from LazyFrame.
367
+
368
+ Args:
369
+ lf: Polars LazyFrame
370
+ columns: List of numeric column names
371
+ n_bins: Number of histogram bins per column
372
+
373
+ Returns:
374
+ MultiColumnStatistics instance
375
+ """
376
+ # Collect data for selected columns
377
+ df = lf.select([pl.col(c) for c in columns]).drop_nulls().collect()
378
+
379
+ if len(df) == 0:
380
+ return cls(
381
+ column_stats={},
382
+ correlation_matrix={},
383
+ covariance_matrix={},
384
+ medians={},
385
+ iqrs={},
386
+ columns=columns,
387
+ )
388
+
389
+ # Per-column statistics
390
+ column_stats = {}
391
+ for col in columns:
392
+ column_stats[col] = NumericStatistics.from_series(df[col], n_bins)
393
+
394
+ # Compute correlation and covariance matrices
395
+ data = df.to_numpy()
396
+ correlation_matrix = {}
397
+ covariance_matrix = {}
398
+
399
+ for i, col1 in enumerate(columns):
400
+ for j, col2 in enumerate(columns):
401
+ if i <= j: # Upper triangle only
402
+ corr = float(np.corrcoef(data[:, i], data[:, j])[0, 1])
403
+ cov = float(np.cov(data[:, i], data[:, j])[0, 1])
404
+ correlation_matrix[(col1, col2)] = corr
405
+ covariance_matrix[(col1, col2)] = cov
406
+
407
+ # Medians and IQRs for normalization
408
+ medians = {}
409
+ iqrs = {}
410
+ for i, col in enumerate(columns):
411
+ medians[col] = float(np.median(data[:, i]))
412
+ q1, q3 = np.percentile(data[:, i], [25, 75])
413
+ iqrs[col] = float(q3 - q1) if q3 != q1 else 1.0
414
+
415
+ return cls(
416
+ column_stats=column_stats,
417
+ correlation_matrix=correlation_matrix,
418
+ covariance_matrix=covariance_matrix,
419
+ medians=medians,
420
+ iqrs=iqrs,
421
+ columns=columns,
422
+ )
423
+
424
+
425
+ # ============================================================================
426
+ # Cache Entry
427
+ # ============================================================================
428
+
429
+ @dataclass
430
+ class CacheEntry:
431
+ """A single cache entry with metadata."""
432
+
433
+ key: str
434
+ value: NumericStatistics | CategoricalStatistics | MultiColumnStatistics
435
+ created_at: float = field(default_factory=time.time)
436
+ last_accessed: float = field(default_factory=time.time)
437
+ access_count: int = 0
438
+
439
+ def is_expired(self, ttl_seconds: float | None) -> bool:
440
+ """Check if entry has expired."""
441
+ if ttl_seconds is None:
442
+ return False
443
+ return (time.time() - self.created_at) > ttl_seconds
444
+
445
+ def touch(self) -> None:
446
+ """Update access time and count."""
447
+ self.last_accessed = time.time()
448
+ self.access_count += 1
449
+
450
+ def estimate_memory_bytes(self) -> int:
451
+ """Estimate memory usage."""
452
+ base_size = sys.getsizeof(self) + sys.getsizeof(self.key)
453
+ value_size = self.value.estimate_memory_bytes()
454
+ return base_size + value_size
455
+
456
+
457
+ # ============================================================================
458
+ # LRU Cache Implementation
459
+ # ============================================================================
460
+
461
+ class ReferenceCache:
462
+ """Thread-safe LRU cache for reference data statistics.
463
+
464
+ This cache stores statistical summaries of reference data instead of
465
+ raw data, dramatically reducing memory usage while preserving the
466
+ information needed for drift and anomaly detection.
467
+
468
+ Example:
469
+ cache = ReferenceCache(CacheConfig(max_entries=100))
470
+
471
+ # Cache numeric statistics
472
+ stats = NumericStatistics.from_lazyframe(lf, "price")
473
+ cache.put("model_v1:price", stats)
474
+
475
+ # Retrieve later
476
+ cached = cache.get("model_v1:price")
477
+ if cached:
478
+ print(f"Mean: {cached.mean}, Std: {cached.std}")
479
+ """
480
+
481
+ def __init__(self, config: CacheConfig | None = None):
482
+ """Initialize cache.
483
+
484
+ Args:
485
+ config: Cache configuration (uses defaults if None)
486
+ """
487
+ self.config = config or CacheConfig()
488
+ self._cache: OrderedDict[str, CacheEntry] = OrderedDict()
489
+ self._lock = RLock()
490
+ self._total_memory_bytes: int = 0
491
+
492
+ # Statistics
493
+ self._hits: int = 0
494
+ self._misses: int = 0
495
+
496
+ def get(self, key: str) -> NumericStatistics | CategoricalStatistics | MultiColumnStatistics | None:
497
+ """Get cached statistics by key.
498
+
499
+ Args:
500
+ key: Cache key
501
+
502
+ Returns:
503
+ Cached statistics or None if not found/expired
504
+ """
505
+ with self._lock:
506
+ if key not in self._cache:
507
+ self._misses += 1
508
+ return None
509
+
510
+ entry = self._cache[key]
511
+
512
+ # Check expiration
513
+ if entry.is_expired(self.config.ttl_seconds):
514
+ self._remove_entry(key)
515
+ self._misses += 1
516
+ return None
517
+
518
+ # Move to end (most recently used)
519
+ self._cache.move_to_end(key)
520
+ entry.touch()
521
+
522
+ self._hits += 1
523
+ return entry.value
524
+
525
+ def put(
526
+ self,
527
+ key: str,
528
+ value: NumericStatistics | CategoricalStatistics | MultiColumnStatistics,
529
+ ) -> None:
530
+ """Store statistics in cache.
531
+
532
+ Args:
533
+ key: Cache key
534
+ value: Statistics to cache
535
+ """
536
+ with self._lock:
537
+ # Remove existing entry if present
538
+ if key in self._cache:
539
+ self._remove_entry(key)
540
+
541
+ # Create new entry
542
+ entry = CacheEntry(key=key, value=value)
543
+ entry_size = entry.estimate_memory_bytes()
544
+
545
+ # Evict entries if necessary
546
+ self._evict_if_needed(entry_size)
547
+
548
+ # Add new entry
549
+ self._cache[key] = entry
550
+ self._total_memory_bytes += entry_size
551
+
552
+ def remove(self, key: str) -> bool:
553
+ """Remove entry from cache.
554
+
555
+ Args:
556
+ key: Cache key
557
+
558
+ Returns:
559
+ True if entry was removed, False if not found
560
+ """
561
+ with self._lock:
562
+ if key in self._cache:
563
+ self._remove_entry(key)
564
+ return True
565
+ return False
566
+
567
+ def clear(self) -> None:
568
+ """Clear all cache entries."""
569
+ with self._lock:
570
+ self._cache.clear()
571
+ self._total_memory_bytes = 0
572
+
573
+ def get_stats(self) -> dict[str, Any]:
574
+ """Get cache statistics.
575
+
576
+ Returns:
577
+ Dictionary of cache statistics
578
+ """
579
+ with self._lock:
580
+ total_requests = self._hits + self._misses
581
+ hit_rate = self._hits / total_requests if total_requests > 0 else 0.0
582
+
583
+ return {
584
+ "entries": len(self._cache),
585
+ "max_entries": self.config.max_entries,
586
+ "memory_bytes": self._total_memory_bytes,
587
+ "memory_mb": self._total_memory_bytes / (1024 * 1024),
588
+ "max_memory_mb": self.config.max_memory_mb,
589
+ "hits": self._hits,
590
+ "misses": self._misses,
591
+ "hit_rate": hit_rate,
592
+ }
593
+
594
+ def _remove_entry(self, key: str) -> None:
595
+ """Remove entry and update memory tracking."""
596
+ if key in self._cache:
597
+ entry = self._cache.pop(key)
598
+ self._total_memory_bytes -= entry.estimate_memory_bytes()
599
+ self._total_memory_bytes = max(0, self._total_memory_bytes)
600
+
601
+ def _evict_if_needed(self, new_entry_size: int) -> None:
602
+ """Evict entries if cache limits are exceeded."""
603
+ max_memory_bytes = int(self.config.max_memory_mb * 1024 * 1024)
604
+
605
+ # Evict while over limits
606
+ while self._cache and (
607
+ len(self._cache) >= self.config.max_entries or
608
+ self._total_memory_bytes + new_entry_size > max_memory_bytes
609
+ ):
610
+ # Remove least recently used (first item)
611
+ oldest_key = next(iter(self._cache))
612
+ self._remove_entry(oldest_key)
613
+
614
+ def __contains__(self, key: str) -> bool:
615
+ """Check if key exists in cache (without updating access time)."""
616
+ with self._lock:
617
+ if key not in self._cache:
618
+ return False
619
+ entry = self._cache[key]
620
+ return not entry.is_expired(self.config.ttl_seconds)
621
+
622
+ def __len__(self) -> int:
623
+ """Return number of cache entries."""
624
+ with self._lock:
625
+ return len(self._cache)
626
+
627
+
628
+ # ============================================================================
629
+ # Global Cache Instance
630
+ # ============================================================================
631
+
632
+ # Default global cache instance
633
+ _global_cache: ReferenceCache | None = None
634
+ _global_cache_lock = RLock()
635
+
636
+
637
+ def get_global_cache(config: CacheConfig | None = None) -> ReferenceCache:
638
+ """Get or create the global reference cache.
639
+
640
+ Args:
641
+ config: Optional configuration (only used on first call)
642
+
643
+ Returns:
644
+ Global ReferenceCache instance
645
+ """
646
+ global _global_cache
647
+
648
+ with _global_cache_lock:
649
+ if _global_cache is None:
650
+ _global_cache = ReferenceCache(config or CacheConfig())
651
+ return _global_cache
652
+
653
+
654
+ def clear_global_cache() -> None:
655
+ """Clear the global cache."""
656
+ global _global_cache
657
+
658
+ with _global_cache_lock:
659
+ if _global_cache is not None:
660
+ _global_cache.clear()
661
+
662
+
663
+ def reset_global_cache(config: CacheConfig | None = None) -> ReferenceCache:
664
+ """Reset the global cache with new configuration.
665
+
666
+ Args:
667
+ config: New configuration
668
+
669
+ Returns:
670
+ New global ReferenceCache instance
671
+ """
672
+ global _global_cache
673
+
674
+ with _global_cache_lock:
675
+ _global_cache = ReferenceCache(config or CacheConfig())
676
+ return _global_cache
677
+
678
+
679
+ # ============================================================================
680
+ # Cache Key Utilities
681
+ # ============================================================================
682
+
683
+ def make_cache_key(
684
+ validator_name: str,
685
+ column: str | list[str],
686
+ version: str = "v1",
687
+ extra: str = "",
688
+ ) -> str:
689
+ """Create a standardized cache key.
690
+
691
+ Args:
692
+ validator_name: Name of the validator
693
+ column: Column name or list of column names
694
+ version: Version string for cache invalidation
695
+ extra: Extra key component
696
+
697
+ Returns:
698
+ Cache key string
699
+ """
700
+ if isinstance(column, list):
701
+ col_str = ":".join(sorted(column))
702
+ else:
703
+ col_str = column
704
+
705
+ parts = [validator_name, col_str, version]
706
+ if extra:
707
+ parts.append(extra)
708
+
709
+ return "|".join(parts)
710
+
711
+
712
+ def hash_dataframe(lf: pl.LazyFrame, sample_size: int = 1000) -> str:
713
+ """Create a hash of a LazyFrame for cache key generation.
714
+
715
+ Uses sampling to avoid full materialization.
716
+
717
+ Args:
718
+ lf: LazyFrame to hash
719
+ sample_size: Number of rows to sample
720
+
721
+ Returns:
722
+ Hash string
723
+ """
724
+ # Get schema hash
725
+ schema = lf.collect_schema()
726
+ schema_str = str(sorted(schema.items()))
727
+
728
+ # Sample data hash
729
+ sample = lf.head(sample_size).collect()
730
+ data_str = sample.to_pandas().to_json()
731
+
732
+ combined = f"{schema_str}:{data_str}"
733
+ return hashlib.md5(combined.encode()).hexdigest()[:16]