truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1515 @@
1
+ """Enterprise audit logging system for Truthound.
2
+
3
+ This module extends the base audit system with enterprise features:
4
+ - Full operation audit trail
5
+ - Additional storage backends (Elasticsearch, S3, Kafka)
6
+ - Compliance reporting
7
+ - Retention policies
8
+ - Integration with correlation context
9
+
10
+ Architecture:
11
+ EnterpriseAuditLogger
12
+ |
13
+ +---> Filter Pipeline
14
+ +---> Processor Pipeline
15
+ |
16
+ v
17
+ AuditStorage[]
18
+ |
19
+ +---> SQLiteAuditStorage (local)
20
+ +---> ElasticsearchAuditStorage
21
+ +---> S3AuditStorage
22
+ +---> KafkaAuditStorage
23
+ |
24
+ v
25
+ ComplianceReporter (generates reports)
26
+
27
+ Usage:
28
+ >>> from truthound.infrastructure.audit import (
29
+ ... get_audit_logger, configure_audit,
30
+ ... EnterpriseAuditConfig,
31
+ ... )
32
+ >>>
33
+ >>> # Configure for production
34
+ >>> configure_audit(
35
+ ... service_name="truthound",
36
+ ... environment="production",
37
+ ... storage_backends=["sqlite", "elasticsearch"],
38
+ ... elasticsearch_url="http://elk:9200",
39
+ ... )
40
+ >>>
41
+ >>> # Log audit event
42
+ >>> logger = get_audit_logger()
43
+ >>> logger.log_operation(
44
+ ... operation="validation",
45
+ ... resource="dataset:users",
46
+ ... outcome="success",
47
+ ... details={"rows": 10000, "issues": 5},
48
+ ... )
49
+ """
50
+
51
+ from __future__ import annotations
52
+
53
+ import json
54
+ import os
55
+ import queue
56
+ import socket
57
+ import threading
58
+ import time
59
+ import uuid
60
+ from abc import ABC, abstractmethod
61
+ from concurrent.futures import ThreadPoolExecutor
62
+ from contextlib import contextmanager
63
+ from dataclasses import dataclass, field
64
+ from datetime import datetime, timedelta, timezone
65
+ from enum import Enum
66
+ from pathlib import Path
67
+ from typing import Any, Callable, Iterator
68
+
69
+ # Re-export and extend base audit components
70
+ from truthound.audit import (
71
+ AuditEventType,
72
+ AuditSeverity,
73
+ AuditOutcome,
74
+ AuditCategory,
75
+ AuditActor,
76
+ AuditResource,
77
+ AuditContext,
78
+ AuditEvent,
79
+ AuditConfig,
80
+ AuditStorage,
81
+ AuditFormatter,
82
+ AuditFilter,
83
+ AuditProcessor,
84
+ AuditEventBuilder,
85
+ AuditLogger,
86
+ MemoryAuditStorage,
87
+ SQLiteAuditStorage,
88
+ JSONFormatter,
89
+ SeverityFilter,
90
+ PrivacyProcessor,
91
+ ChecksumProcessor,
92
+ )
93
+
94
+ # Import correlation context from logging
95
+ from truthound.infrastructure.logging import (
96
+ CorrelationContext,
97
+ get_correlation_id,
98
+ )
99
+
100
+
101
+ # =============================================================================
102
+ # Extended Configuration
103
+ # =============================================================================
104
+
105
+
106
+ @dataclass
107
+ class EnterpriseAuditConfig(AuditConfig):
108
+ """Extended audit configuration for enterprise features.
109
+
110
+ Example:
111
+ >>> config = EnterpriseAuditConfig(
112
+ ... service_name="truthound",
113
+ ... environment="production",
114
+ ... storage_backends=["sqlite", "elasticsearch"],
115
+ ... elasticsearch_url="http://elk:9200",
116
+ ... s3_bucket="audit-logs",
117
+ ... retention_days=365,
118
+ ... compliance_standards=["SOC2", "GDPR"],
119
+ ... )
120
+ """
121
+
122
+ # Extended storage options
123
+ storage_backends: list[str] = field(
124
+ default_factory=lambda: ["memory"]
125
+ ) # memory, sqlite, elasticsearch, s3, kafka
126
+
127
+ # Elasticsearch settings
128
+ elasticsearch_url: str = ""
129
+ elasticsearch_index_prefix: str = "truthound-audit"
130
+ elasticsearch_username: str = ""
131
+ elasticsearch_password: str = ""
132
+
133
+ # S3 settings
134
+ s3_bucket: str = ""
135
+ s3_prefix: str = "audit/"
136
+ s3_region: str = ""
137
+
138
+ # Kafka settings
139
+ kafka_bootstrap_servers: str = ""
140
+ kafka_topic: str = "truthound-audit"
141
+
142
+ # Compliance settings
143
+ compliance_standards: list[str] = field(default_factory=list) # SOC2, GDPR, HIPAA
144
+ require_checksum: bool = True
145
+ require_signing: bool = False
146
+
147
+ # Retention policy
148
+ retention_policy: str = "default" # default, compliance, custom
149
+ archive_to_cold_storage: bool = False
150
+ cold_storage_after_days: int = 90
151
+
152
+ # Performance
153
+ async_write: bool = True
154
+ batch_size: int = 100
155
+ flush_interval: float = 5.0
156
+
157
+ @classmethod
158
+ def production(cls, service_name: str) -> "EnterpriseAuditConfig":
159
+ """Create production configuration."""
160
+ return cls(
161
+ enabled=True,
162
+ service_name=service_name,
163
+ environment="production",
164
+ storage_backends=["sqlite"],
165
+ require_checksum=True,
166
+ async_write=True,
167
+ retention_days=365,
168
+ )
169
+
170
+ @classmethod
171
+ def compliance(
172
+ cls,
173
+ service_name: str,
174
+ standards: list[str],
175
+ ) -> "EnterpriseAuditConfig":
176
+ """Create compliance-focused configuration."""
177
+ return cls(
178
+ enabled=True,
179
+ service_name=service_name,
180
+ environment="production",
181
+ storage_backends=["sqlite", "s3"],
182
+ require_checksum=True,
183
+ require_signing=True,
184
+ compliance_standards=standards,
185
+ retention_policy="compliance",
186
+ retention_days=2555, # 7 years
187
+ archive_to_cold_storage=True,
188
+ )
189
+
190
+
191
+ # =============================================================================
192
+ # Additional Storage Backends
193
+ # =============================================================================
194
+
195
+
196
+ class ElasticsearchAuditStorage(AuditStorage):
197
+ """Elasticsearch audit storage backend.
198
+
199
+ Stores audit events in Elasticsearch for search and analysis.
200
+ """
201
+
202
+ def __init__(
203
+ self,
204
+ url: str,
205
+ *,
206
+ index_prefix: str = "truthound-audit",
207
+ username: str = "",
208
+ password: str = "",
209
+ api_key: str = "",
210
+ batch_size: int = 100,
211
+ flush_interval: float = 5.0,
212
+ ) -> None:
213
+ """Initialize Elasticsearch storage.
214
+
215
+ Args:
216
+ url: Elasticsearch URL.
217
+ index_prefix: Index name prefix.
218
+ username: Basic auth username.
219
+ password: Basic auth password.
220
+ api_key: API key for auth.
221
+ batch_size: Batch size for bulk indexing.
222
+ flush_interval: Flush interval in seconds.
223
+ """
224
+ self._url = url.rstrip("/")
225
+ self._index_prefix = index_prefix
226
+ self._username = username
227
+ self._password = password
228
+ self._api_key = api_key
229
+ self._batch_size = batch_size
230
+ self._flush_interval = flush_interval
231
+
232
+ self._buffer: list[AuditEvent] = []
233
+ self._lock = threading.Lock()
234
+ self._last_flush = time.time()
235
+ self._executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="es-audit")
236
+ self._running = True
237
+
238
+ # Start background flusher
239
+ self._flush_thread = threading.Thread(
240
+ target=self._background_flush,
241
+ daemon=True,
242
+ name="es-audit-flusher",
243
+ )
244
+ self._flush_thread.start()
245
+
246
+ def write(self, event: AuditEvent) -> None:
247
+ """Buffer event for bulk indexing."""
248
+ with self._lock:
249
+ self._buffer.append(event)
250
+ if len(self._buffer) >= self._batch_size:
251
+ self._flush_buffer()
252
+
253
+ def write_batch(self, events: list[AuditEvent]) -> None:
254
+ """Write multiple events."""
255
+ with self._lock:
256
+ self._buffer.extend(events)
257
+ if len(self._buffer) >= self._batch_size:
258
+ self._flush_buffer()
259
+
260
+ def _background_flush(self) -> None:
261
+ """Background flush loop."""
262
+ while self._running:
263
+ time.sleep(1)
264
+ with self._lock:
265
+ if (
266
+ self._buffer
267
+ and time.time() - self._last_flush >= self._flush_interval
268
+ ):
269
+ self._flush_buffer()
270
+
271
+ def _flush_buffer(self) -> None:
272
+ """Flush buffered events to Elasticsearch."""
273
+ if not self._buffer:
274
+ return
275
+
276
+ events = self._buffer.copy()
277
+ self._buffer.clear()
278
+ self._last_flush = time.time()
279
+ self._executor.submit(self._bulk_index, events)
280
+
281
+ def _get_index_name(self, timestamp: datetime) -> str:
282
+ """Get index name for timestamp."""
283
+ suffix = timestamp.strftime("%Y.%m.%d")
284
+ return f"{self._index_prefix}-{suffix}"
285
+
286
+ def _bulk_index(self, events: list[AuditEvent]) -> None:
287
+ """Bulk index events to Elasticsearch."""
288
+ try:
289
+ import urllib.request
290
+
291
+ lines = []
292
+ for event in events:
293
+ index_name = self._get_index_name(event.timestamp)
294
+ action = json.dumps({"index": {"_index": index_name, "_id": event.id}})
295
+ doc = json.dumps(event.to_dict(), default=str)
296
+ lines.append(action)
297
+ lines.append(doc)
298
+ body = "\n".join(lines) + "\n"
299
+
300
+ url = f"{self._url}/_bulk"
301
+ headers = {"Content-Type": "application/x-ndjson"}
302
+
303
+ if self._api_key:
304
+ headers["Authorization"] = f"ApiKey {self._api_key}"
305
+
306
+ request = urllib.request.Request(
307
+ url,
308
+ data=body.encode("utf-8"),
309
+ headers=headers,
310
+ method="POST",
311
+ )
312
+
313
+ if self._username and self._password:
314
+ import base64
315
+
316
+ credentials = base64.b64encode(
317
+ f"{self._username}:{self._password}".encode()
318
+ ).decode()
319
+ request.add_header("Authorization", f"Basic {credentials}")
320
+
321
+ with urllib.request.urlopen(request, timeout=30):
322
+ pass
323
+
324
+ except Exception:
325
+ pass
326
+
327
+ def read(self, event_id: str) -> AuditEvent | None:
328
+ """Read event by ID (searches all indices)."""
329
+ try:
330
+ import urllib.request
331
+
332
+ # Search across all audit indices
333
+ url = f"{self._url}/{self._index_prefix}-*/_doc/{event_id}"
334
+ request = urllib.request.Request(url)
335
+
336
+ if self._api_key:
337
+ request.add_header("Authorization", f"ApiKey {self._api_key}")
338
+
339
+ with urllib.request.urlopen(request, timeout=30) as response:
340
+ data = json.loads(response.read().decode("utf-8"))
341
+ if data.get("found"):
342
+ return AuditEvent.from_dict(data["_source"])
343
+
344
+ except Exception:
345
+ pass
346
+ return None
347
+
348
+ def query(
349
+ self,
350
+ start_time: datetime | None = None,
351
+ end_time: datetime | None = None,
352
+ event_types: list[AuditEventType] | None = None,
353
+ actor_id: str | None = None,
354
+ resource_id: str | None = None,
355
+ outcome: AuditOutcome | None = None,
356
+ limit: int = 100,
357
+ offset: int = 0,
358
+ ) -> list[AuditEvent]:
359
+ """Query audit events."""
360
+ try:
361
+ import urllib.request
362
+
363
+ # Build query
364
+ must_clauses = []
365
+
366
+ if start_time or end_time:
367
+ range_clause = {"range": {"timestamp": {}}}
368
+ if start_time:
369
+ range_clause["range"]["timestamp"]["gte"] = start_time.isoformat()
370
+ if end_time:
371
+ range_clause["range"]["timestamp"]["lte"] = end_time.isoformat()
372
+ must_clauses.append(range_clause)
373
+
374
+ if event_types:
375
+ must_clauses.append({
376
+ "terms": {"event_type": [t.value for t in event_types]}
377
+ })
378
+
379
+ if actor_id:
380
+ must_clauses.append({"term": {"actor.id": actor_id}})
381
+
382
+ if resource_id:
383
+ must_clauses.append({"term": {"resource.id": resource_id}})
384
+
385
+ if outcome:
386
+ must_clauses.append({"term": {"outcome": outcome.value}})
387
+
388
+ query = {
389
+ "query": {"bool": {"must": must_clauses}} if must_clauses else {"match_all": {}},
390
+ "sort": [{"timestamp": "desc"}],
391
+ "from": offset,
392
+ "size": limit,
393
+ }
394
+
395
+ url = f"{self._url}/{self._index_prefix}-*/_search"
396
+ request = urllib.request.Request(
397
+ url,
398
+ data=json.dumps(query).encode("utf-8"),
399
+ headers={"Content-Type": "application/json"},
400
+ method="POST",
401
+ )
402
+
403
+ if self._api_key:
404
+ request.add_header("Authorization", f"ApiKey {self._api_key}")
405
+
406
+ with urllib.request.urlopen(request, timeout=30) as response:
407
+ data = json.loads(response.read().decode("utf-8"))
408
+ events = []
409
+ for hit in data.get("hits", {}).get("hits", []):
410
+ events.append(AuditEvent.from_dict(hit["_source"]))
411
+ return events
412
+
413
+ except Exception:
414
+ return []
415
+
416
+ def count(
417
+ self,
418
+ start_time: datetime | None = None,
419
+ end_time: datetime | None = None,
420
+ event_types: list[AuditEventType] | None = None,
421
+ ) -> int:
422
+ """Count matching events."""
423
+ try:
424
+ import urllib.request
425
+
426
+ must_clauses = []
427
+ if start_time or end_time:
428
+ range_clause = {"range": {"timestamp": {}}}
429
+ if start_time:
430
+ range_clause["range"]["timestamp"]["gte"] = start_time.isoformat()
431
+ if end_time:
432
+ range_clause["range"]["timestamp"]["lte"] = end_time.isoformat()
433
+ must_clauses.append(range_clause)
434
+
435
+ if event_types:
436
+ must_clauses.append({
437
+ "terms": {"event_type": [t.value for t in event_types]}
438
+ })
439
+
440
+ query = {
441
+ "query": {"bool": {"must": must_clauses}} if must_clauses else {"match_all": {}},
442
+ }
443
+
444
+ url = f"{self._url}/{self._index_prefix}-*/_count"
445
+ request = urllib.request.Request(
446
+ url,
447
+ data=json.dumps(query).encode("utf-8"),
448
+ headers={"Content-Type": "application/json"},
449
+ method="POST",
450
+ )
451
+
452
+ with urllib.request.urlopen(request, timeout=30) as response:
453
+ data = json.loads(response.read().decode("utf-8"))
454
+ return data.get("count", 0)
455
+
456
+ except Exception:
457
+ return 0
458
+
459
+ def delete_before(self, before: datetime) -> int:
460
+ """Delete events before timestamp."""
461
+ try:
462
+ import urllib.request
463
+
464
+ query = {
465
+ "query": {
466
+ "range": {"timestamp": {"lt": before.isoformat()}}
467
+ }
468
+ }
469
+
470
+ url = f"{self._url}/{self._index_prefix}-*/_delete_by_query"
471
+ request = urllib.request.Request(
472
+ url,
473
+ data=json.dumps(query).encode("utf-8"),
474
+ headers={"Content-Type": "application/json"},
475
+ method="POST",
476
+ )
477
+
478
+ with urllib.request.urlopen(request, timeout=60) as response:
479
+ data = json.loads(response.read().decode("utf-8"))
480
+ return data.get("deleted", 0)
481
+
482
+ except Exception:
483
+ return 0
484
+
485
+ def flush(self) -> None:
486
+ """Flush buffered events."""
487
+ with self._lock:
488
+ self._flush_buffer()
489
+
490
+ def close(self) -> None:
491
+ """Close storage."""
492
+ self._running = False
493
+ self.flush()
494
+ self._executor.shutdown(wait=True)
495
+
496
+
497
+ class S3AuditStorage(AuditStorage):
498
+ """S3 audit storage backend.
499
+
500
+ Stores audit events in S3 for long-term archival.
501
+ Events are batched into JSON files organized by date.
502
+ """
503
+
504
+ def __init__(
505
+ self,
506
+ bucket: str,
507
+ *,
508
+ prefix: str = "audit/",
509
+ region: str = "",
510
+ batch_size: int = 1000,
511
+ flush_interval: float = 60.0,
512
+ ) -> None:
513
+ """Initialize S3 storage.
514
+
515
+ Args:
516
+ bucket: S3 bucket name.
517
+ prefix: Key prefix.
518
+ region: AWS region.
519
+ batch_size: Events per file.
520
+ flush_interval: Flush interval in seconds.
521
+ """
522
+ self._bucket = bucket
523
+ self._prefix = prefix.rstrip("/") + "/"
524
+ self._region = region or os.getenv("AWS_REGION", "us-east-1")
525
+ self._batch_size = batch_size
526
+ self._flush_interval = flush_interval
527
+
528
+ self._buffer: list[AuditEvent] = []
529
+ self._lock = threading.Lock()
530
+ self._last_flush = time.time()
531
+ self._executor = ThreadPoolExecutor(max_workers=2, thread_name_prefix="s3-audit")
532
+ self._running = True
533
+
534
+ # Start background flusher
535
+ self._flush_thread = threading.Thread(
536
+ target=self._background_flush,
537
+ daemon=True,
538
+ name="s3-audit-flusher",
539
+ )
540
+ self._flush_thread.start()
541
+
542
+ def write(self, event: AuditEvent) -> None:
543
+ """Buffer event for batch upload."""
544
+ with self._lock:
545
+ self._buffer.append(event)
546
+ if len(self._buffer) >= self._batch_size:
547
+ self._flush_buffer()
548
+
549
+ def write_batch(self, events: list[AuditEvent]) -> None:
550
+ """Write multiple events."""
551
+ with self._lock:
552
+ self._buffer.extend(events)
553
+ while len(self._buffer) >= self._batch_size:
554
+ self._flush_buffer()
555
+
556
+ def _background_flush(self) -> None:
557
+ """Background flush loop."""
558
+ while self._running:
559
+ time.sleep(10)
560
+ with self._lock:
561
+ if (
562
+ self._buffer
563
+ and time.time() - self._last_flush >= self._flush_interval
564
+ ):
565
+ self._flush_buffer()
566
+
567
+ def _flush_buffer(self) -> None:
568
+ """Flush buffered events to S3."""
569
+ if not self._buffer:
570
+ return
571
+
572
+ events = self._buffer.copy()
573
+ self._buffer.clear()
574
+ self._last_flush = time.time()
575
+ self._executor.submit(self._upload_batch, events)
576
+
577
+ def _upload_batch(self, events: list[AuditEvent]) -> None:
578
+ """Upload batch of events to S3."""
579
+ try:
580
+ import boto3
581
+
582
+ s3 = boto3.client("s3", region_name=self._region)
583
+
584
+ # Generate key based on timestamp
585
+ now = datetime.now(timezone.utc)
586
+ date_path = now.strftime("%Y/%m/%d")
587
+ file_id = uuid.uuid4().hex[:8]
588
+ key = f"{self._prefix}{date_path}/audit_{now.strftime('%H%M%S')}_{file_id}.json"
589
+
590
+ # Serialize events
591
+ data = {
592
+ "metadata": {
593
+ "created_at": now.isoformat(),
594
+ "event_count": len(events),
595
+ "hostname": socket.gethostname(),
596
+ },
597
+ "events": [event.to_dict() for event in events],
598
+ }
599
+
600
+ s3.put_object(
601
+ Bucket=self._bucket,
602
+ Key=key,
603
+ Body=json.dumps(data, default=str).encode("utf-8"),
604
+ ContentType="application/json",
605
+ )
606
+
607
+ except Exception:
608
+ pass
609
+
610
+ def read(self, event_id: str) -> AuditEvent | None:
611
+ """Read event by ID (not efficient for S3, use query instead)."""
612
+ return None # S3 is write-optimized, use query for reads
613
+
614
+ def query(
615
+ self,
616
+ start_time: datetime | None = None,
617
+ end_time: datetime | None = None,
618
+ event_types: list[AuditEventType] | None = None,
619
+ actor_id: str | None = None,
620
+ resource_id: str | None = None,
621
+ outcome: AuditOutcome | None = None,
622
+ limit: int = 100,
623
+ offset: int = 0,
624
+ ) -> list[AuditEvent]:
625
+ """Query audit events (uses S3 Select for filtering)."""
626
+ try:
627
+ import boto3
628
+
629
+ s3 = boto3.client("s3", region_name=self._region)
630
+
631
+ # List relevant objects based on date range
632
+ start = start_time or datetime.now(timezone.utc) - timedelta(days=7)
633
+ end = end_time or datetime.now(timezone.utc)
634
+
635
+ events = []
636
+ paginator = s3.get_paginator("list_objects_v2")
637
+
638
+ current = start
639
+ while current <= end:
640
+ prefix = f"{self._prefix}{current.strftime('%Y/%m/%d')}/"
641
+
642
+ for page in paginator.paginate(Bucket=self._bucket, Prefix=prefix):
643
+ for obj in page.get("Contents", []):
644
+ # Download and parse
645
+ response = s3.get_object(
646
+ Bucket=self._bucket,
647
+ Key=obj["Key"],
648
+ )
649
+ data = json.loads(response["Body"].read().decode("utf-8"))
650
+
651
+ for event_dict in data.get("events", []):
652
+ event = AuditEvent.from_dict(event_dict)
653
+
654
+ # Apply filters
655
+ if event_types and event.event_type not in event_types:
656
+ continue
657
+ if actor_id and (not event.actor or event.actor.id != actor_id):
658
+ continue
659
+ if resource_id and (not event.resource or event.resource.id != resource_id):
660
+ continue
661
+ if outcome and event.outcome != outcome:
662
+ continue
663
+
664
+ events.append(event)
665
+
666
+ if len(events) >= offset + limit:
667
+ return events[offset:offset + limit]
668
+
669
+ current += timedelta(days=1)
670
+
671
+ return events[offset:offset + limit]
672
+
673
+ except Exception:
674
+ return []
675
+
676
+ def count(
677
+ self,
678
+ start_time: datetime | None = None,
679
+ end_time: datetime | None = None,
680
+ event_types: list[AuditEventType] | None = None,
681
+ ) -> int:
682
+ """Count matching events."""
683
+ events = self.query(
684
+ start_time=start_time,
685
+ end_time=end_time,
686
+ event_types=event_types,
687
+ limit=100000,
688
+ )
689
+ return len(events)
690
+
691
+ def delete_before(self, before: datetime) -> int:
692
+ """Delete events before timestamp."""
693
+ try:
694
+ import boto3
695
+
696
+ s3 = boto3.client("s3", region_name=self._region)
697
+ deleted = 0
698
+
699
+ # List and delete old objects
700
+ paginator = s3.get_paginator("list_objects_v2")
701
+ current = before - timedelta(days=365 * 10) # Look back 10 years
702
+
703
+ while current < before:
704
+ prefix = f"{self._prefix}{current.strftime('%Y/%m/%d')}/"
705
+
706
+ for page in paginator.paginate(Bucket=self._bucket, Prefix=prefix):
707
+ objects = page.get("Contents", [])
708
+ if objects:
709
+ s3.delete_objects(
710
+ Bucket=self._bucket,
711
+ Delete={"Objects": [{"Key": obj["Key"]} for obj in objects]},
712
+ )
713
+ deleted += len(objects)
714
+
715
+ current += timedelta(days=1)
716
+
717
+ return deleted
718
+
719
+ except Exception:
720
+ return 0
721
+
722
+ def flush(self) -> None:
723
+ """Flush buffered events."""
724
+ with self._lock:
725
+ self._flush_buffer()
726
+
727
+ def close(self) -> None:
728
+ """Close storage."""
729
+ self._running = False
730
+ self.flush()
731
+ self._executor.shutdown(wait=True)
732
+
733
+
734
+ class KafkaAuditStorage(AuditStorage):
735
+ """Kafka audit storage backend.
736
+
737
+ Publishes audit events to Kafka for real-time streaming.
738
+ """
739
+
740
+ def __init__(
741
+ self,
742
+ bootstrap_servers: str,
743
+ *,
744
+ topic: str = "truthound-audit",
745
+ batch_size: int = 100,
746
+ linger_ms: int = 5,
747
+ ) -> None:
748
+ """Initialize Kafka storage.
749
+
750
+ Args:
751
+ bootstrap_servers: Kafka bootstrap servers.
752
+ topic: Kafka topic.
753
+ batch_size: Batch size.
754
+ linger_ms: Linger time for batching.
755
+ """
756
+ self._bootstrap_servers = bootstrap_servers
757
+ self._topic = topic
758
+ self._batch_size = batch_size
759
+ self._linger_ms = linger_ms
760
+ self._producer = None
761
+ self._lock = threading.Lock()
762
+
763
+ def _get_producer(self) -> Any:
764
+ """Get or create Kafka producer."""
765
+ if self._producer is None:
766
+ try:
767
+ from kafka import KafkaProducer
768
+
769
+ self._producer = KafkaProducer(
770
+ bootstrap_servers=self._bootstrap_servers.split(","),
771
+ value_serializer=lambda v: json.dumps(v, default=str).encode("utf-8"),
772
+ batch_size=self._batch_size * 1024,
773
+ linger_ms=self._linger_ms,
774
+ )
775
+ except ImportError:
776
+ raise RuntimeError("kafka-python not installed")
777
+
778
+ return self._producer
779
+
780
+ def write(self, event: AuditEvent) -> None:
781
+ """Write event to Kafka."""
782
+ try:
783
+ producer = self._get_producer()
784
+ producer.send(
785
+ self._topic,
786
+ value=event.to_dict(),
787
+ key=event.id.encode("utf-8"),
788
+ )
789
+ except Exception:
790
+ pass
791
+
792
+ def write_batch(self, events: list[AuditEvent]) -> None:
793
+ """Write multiple events to Kafka."""
794
+ try:
795
+ producer = self._get_producer()
796
+ for event in events:
797
+ producer.send(
798
+ self._topic,
799
+ value=event.to_dict(),
800
+ key=event.id.encode("utf-8"),
801
+ )
802
+ producer.flush()
803
+ except Exception:
804
+ pass
805
+
806
+ def read(self, event_id: str) -> AuditEvent | None:
807
+ """Read not supported for Kafka (write-only)."""
808
+ return None
809
+
810
+ def query(
811
+ self,
812
+ start_time: datetime | None = None,
813
+ end_time: datetime | None = None,
814
+ event_types: list[AuditEventType] | None = None,
815
+ actor_id: str | None = None,
816
+ resource_id: str | None = None,
817
+ outcome: AuditOutcome | None = None,
818
+ limit: int = 100,
819
+ offset: int = 0,
820
+ ) -> list[AuditEvent]:
821
+ """Query not supported for Kafka (write-only)."""
822
+ return []
823
+
824
+ def count(
825
+ self,
826
+ start_time: datetime | None = None,
827
+ end_time: datetime | None = None,
828
+ event_types: list[AuditEventType] | None = None,
829
+ ) -> int:
830
+ """Count not supported for Kafka."""
831
+ return 0
832
+
833
+ def delete_before(self, before: datetime) -> int:
834
+ """Delete not supported for Kafka."""
835
+ return 0
836
+
837
+ def flush(self) -> None:
838
+ """Flush Kafka producer."""
839
+ if self._producer:
840
+ self._producer.flush()
841
+
842
+ def close(self) -> None:
843
+ """Close Kafka producer."""
844
+ if self._producer:
845
+ self._producer.close()
846
+ self._producer = None
847
+
848
+
849
+ # =============================================================================
850
+ # Retention Policy
851
+ # =============================================================================
852
+
853
+
854
+ @dataclass
855
+ class RetentionPolicy:
856
+ """Audit log retention policy.
857
+
858
+ Example:
859
+ >>> policy = RetentionPolicy(
860
+ ... name="compliance",
861
+ ... retention_days=2555, # 7 years
862
+ ... archive_after_days=90,
863
+ ... archive_storage="s3",
864
+ ... )
865
+ """
866
+
867
+ name: str
868
+ retention_days: int = 365
869
+ archive_after_days: int = 90
870
+ archive_storage: str = "" # s3, glacier
871
+ delete_after_archive: bool = False
872
+
873
+ @classmethod
874
+ def default(cls) -> "RetentionPolicy":
875
+ """Default retention policy (1 year)."""
876
+ return cls(name="default", retention_days=365)
877
+
878
+ @classmethod
879
+ def compliance_soc2(cls) -> "RetentionPolicy":
880
+ """SOC 2 compliant retention (7 years)."""
881
+ return cls(
882
+ name="soc2",
883
+ retention_days=2555,
884
+ archive_after_days=90,
885
+ archive_storage="s3",
886
+ )
887
+
888
+ @classmethod
889
+ def compliance_gdpr(cls) -> "RetentionPolicy":
890
+ """GDPR compliant retention."""
891
+ return cls(
892
+ name="gdpr",
893
+ retention_days=365, # Minimize data retention
894
+ archive_after_days=30,
895
+ )
896
+
897
+ @classmethod
898
+ def compliance_hipaa(cls) -> "RetentionPolicy":
899
+ """HIPAA compliant retention (6 years)."""
900
+ return cls(
901
+ name="hipaa",
902
+ retention_days=2190,
903
+ archive_after_days=90,
904
+ archive_storage="s3",
905
+ )
906
+
907
+
908
+ # =============================================================================
909
+ # Compliance Reporter
910
+ # =============================================================================
911
+
912
+
913
+ class ComplianceReporter:
914
+ """Generate compliance reports from audit logs.
915
+
916
+ Example:
917
+ >>> reporter = ComplianceReporter(storage)
918
+ >>> report = reporter.generate_report(
919
+ ... start_date=datetime(2024, 1, 1),
920
+ ... end_date=datetime(2024, 12, 31),
921
+ ... standard="SOC2",
922
+ ... )
923
+ """
924
+
925
+ def __init__(self, storage: AuditStorage) -> None:
926
+ """Initialize compliance reporter.
927
+
928
+ Args:
929
+ storage: Audit storage backend.
930
+ """
931
+ self._storage = storage
932
+
933
+ def generate_report(
934
+ self,
935
+ start_date: datetime,
936
+ end_date: datetime,
937
+ *,
938
+ standard: str = "",
939
+ include_details: bool = False,
940
+ ) -> dict[str, Any]:
941
+ """Generate compliance report.
942
+
943
+ Args:
944
+ start_date: Report start date.
945
+ end_date: Report end date.
946
+ standard: Compliance standard (SOC2, GDPR, HIPAA).
947
+ include_details: Include detailed event list.
948
+
949
+ Returns:
950
+ Report dictionary.
951
+ """
952
+ events = self._storage.query(
953
+ start_time=start_date,
954
+ end_time=end_date,
955
+ limit=100000,
956
+ )
957
+
958
+ # Basic statistics
959
+ total_events = len(events)
960
+ by_type: dict[str, int] = {}
961
+ by_outcome: dict[str, int] = {}
962
+ by_severity: dict[str, int] = {}
963
+ by_actor: dict[str, int] = {}
964
+
965
+ for event in events:
966
+ by_type[event.event_type.value] = by_type.get(event.event_type.value, 0) + 1
967
+ by_outcome[event.outcome.value] = by_outcome.get(event.outcome.value, 0) + 1
968
+ by_severity[event.severity.value] = by_severity.get(event.severity.value, 0) + 1
969
+
970
+ if event.actor:
971
+ by_actor[event.actor.id] = by_actor.get(event.actor.id, 0) + 1
972
+
973
+ # Security-relevant events
974
+ security_events = [
975
+ e for e in events
976
+ if e.event_type in (
977
+ AuditEventType.LOGIN_FAILED,
978
+ AuditEventType.ACCESS_DENIED,
979
+ AuditEventType.SECURITY_ALERT,
980
+ AuditEventType.SUSPICIOUS_ACTIVITY,
981
+ )
982
+ ]
983
+
984
+ # Failed operations
985
+ failed_events = [e for e in events if e.outcome == AuditOutcome.FAILURE]
986
+
987
+ report = {
988
+ "metadata": {
989
+ "generated_at": datetime.now(timezone.utc).isoformat(),
990
+ "period": {
991
+ "start": start_date.isoformat(),
992
+ "end": end_date.isoformat(),
993
+ },
994
+ "standard": standard,
995
+ },
996
+ "summary": {
997
+ "total_events": total_events,
998
+ "security_events": len(security_events),
999
+ "failed_operations": len(failed_events),
1000
+ "unique_actors": len(by_actor),
1001
+ },
1002
+ "breakdown": {
1003
+ "by_type": by_type,
1004
+ "by_outcome": by_outcome,
1005
+ "by_severity": by_severity,
1006
+ },
1007
+ "security": {
1008
+ "login_failures": by_type.get("login_failed", 0),
1009
+ "access_denials": by_type.get("access_denied", 0),
1010
+ "alerts": by_type.get("security_alert", 0),
1011
+ },
1012
+ "compliance": self._generate_compliance_section(events, standard),
1013
+ }
1014
+
1015
+ if include_details:
1016
+ report["events"] = [e.to_dict() for e in events[:1000]]
1017
+
1018
+ return report
1019
+
1020
+ def _generate_compliance_section(
1021
+ self,
1022
+ events: list[AuditEvent],
1023
+ standard: str,
1024
+ ) -> dict[str, Any]:
1025
+ """Generate compliance-specific section."""
1026
+ section: dict[str, Any] = {"standard": standard, "controls": {}}
1027
+
1028
+ if standard.upper() == "SOC2":
1029
+ section["controls"] = {
1030
+ "CC6.1": self._check_access_controls(events),
1031
+ "CC6.2": self._check_authentication(events),
1032
+ "CC6.3": self._check_authorization(events),
1033
+ "CC7.1": self._check_system_monitoring(events),
1034
+ }
1035
+ elif standard.upper() == "GDPR":
1036
+ section["controls"] = {
1037
+ "data_access": self._check_data_access(events),
1038
+ "consent": self._check_consent_events(events),
1039
+ "right_to_be_forgotten": self._check_deletion_events(events),
1040
+ }
1041
+
1042
+ return section
1043
+
1044
+ def _check_access_controls(self, events: list[AuditEvent]) -> dict[str, Any]:
1045
+ """Check access control compliance."""
1046
+ access_events = [
1047
+ e for e in events
1048
+ if e.event_type in (
1049
+ AuditEventType.LOGIN,
1050
+ AuditEventType.LOGOUT,
1051
+ AuditEventType.LOGIN_FAILED,
1052
+ )
1053
+ ]
1054
+ return {
1055
+ "total_access_events": len(access_events),
1056
+ "logged": True,
1057
+ "status": "compliant",
1058
+ }
1059
+
1060
+ def _check_authentication(self, events: list[AuditEvent]) -> dict[str, Any]:
1061
+ """Check authentication logging compliance."""
1062
+ auth_events = [
1063
+ e for e in events if e.category == AuditCategory.AUTHENTICATION
1064
+ ]
1065
+ return {
1066
+ "total_auth_events": len(auth_events),
1067
+ "logged": True,
1068
+ "status": "compliant",
1069
+ }
1070
+
1071
+ def _check_authorization(self, events: list[AuditEvent]) -> dict[str, Any]:
1072
+ """Check authorization logging compliance."""
1073
+ authz_events = [
1074
+ e for e in events if e.category == AuditCategory.AUTHORIZATION
1075
+ ]
1076
+ return {
1077
+ "total_authz_events": len(authz_events),
1078
+ "logged": True,
1079
+ "status": "compliant",
1080
+ }
1081
+
1082
+ def _check_system_monitoring(self, events: list[AuditEvent]) -> dict[str, Any]:
1083
+ """Check system monitoring compliance."""
1084
+ return {
1085
+ "audit_enabled": True,
1086
+ "events_captured": len(events) > 0,
1087
+ "status": "compliant",
1088
+ }
1089
+
1090
+ def _check_data_access(self, events: list[AuditEvent]) -> dict[str, Any]:
1091
+ """Check data access logging for GDPR."""
1092
+ data_events = [
1093
+ e for e in events if e.category == AuditCategory.DATA_ACCESS
1094
+ ]
1095
+ return {
1096
+ "total_data_access_events": len(data_events),
1097
+ "logged": True,
1098
+ }
1099
+
1100
+ def _check_consent_events(self, events: list[AuditEvent]) -> dict[str, Any]:
1101
+ """Check consent-related events for GDPR."""
1102
+ return {"consent_tracking": "implemented"}
1103
+
1104
+ def _check_deletion_events(self, events: list[AuditEvent]) -> dict[str, Any]:
1105
+ """Check deletion events for GDPR right to be forgotten."""
1106
+ deletion_events = [
1107
+ e for e in events if e.event_type == AuditEventType.DELETE
1108
+ ]
1109
+ return {
1110
+ "deletion_events": len(deletion_events),
1111
+ "logged": True,
1112
+ }
1113
+
1114
+
1115
+ # =============================================================================
1116
+ # Enterprise Audit Logger
1117
+ # =============================================================================
1118
+
1119
+
1120
+ class EnterpriseAuditLogger:
1121
+ """Enterprise-grade audit logger.
1122
+
1123
+ Extends the base AuditLogger with:
1124
+ - Automatic correlation ID propagation
1125
+ - Multiple storage backends
1126
+ - Compliance features
1127
+ - Operation-level logging
1128
+
1129
+ Example:
1130
+ >>> logger = EnterpriseAuditLogger(config)
1131
+ >>>
1132
+ >>> # Log operation
1133
+ >>> logger.log_operation(
1134
+ ... operation="validate_dataset",
1135
+ ... resource="dataset:users",
1136
+ ... outcome="success",
1137
+ ... details={"rows": 10000, "issues": 5},
1138
+ ... )
1139
+ >>>
1140
+ >>> # Log with context
1141
+ >>> with logger.operation_context("checkpoint_run", "checkpoint:daily"):
1142
+ ... run_checkpoint()
1143
+ """
1144
+
1145
+ def __init__(
1146
+ self,
1147
+ config: EnterpriseAuditConfig | None = None,
1148
+ storages: list[AuditStorage] | None = None,
1149
+ ) -> None:
1150
+ """Initialize enterprise audit logger.
1151
+
1152
+ Args:
1153
+ config: Audit configuration.
1154
+ storages: Storage backends (overrides config).
1155
+ """
1156
+ self._config = config or EnterpriseAuditConfig()
1157
+ self._storages = storages or self._create_storages_from_config()
1158
+ self._lock = threading.Lock()
1159
+
1160
+ # Processors
1161
+ self._processors: list[AuditProcessor] = []
1162
+ if self._config.mask_sensitive_data:
1163
+ self._processors.append(
1164
+ PrivacyProcessor(self._config.sensitive_fields)
1165
+ )
1166
+ if self._config.require_checksum:
1167
+ self._processors.append(ChecksumProcessor())
1168
+
1169
+ def _create_storages_from_config(self) -> list[AuditStorage]:
1170
+ """Create storage backends from configuration."""
1171
+ storages: list[AuditStorage] = []
1172
+
1173
+ for backend in self._config.storage_backends:
1174
+ if backend == "memory":
1175
+ storages.append(MemoryAuditStorage())
1176
+
1177
+ elif backend == "sqlite":
1178
+ db_path = self._config.storage_config.get(
1179
+ "sqlite_path", "audit.db"
1180
+ )
1181
+ storages.append(SQLiteAuditStorage(db_path))
1182
+
1183
+ elif backend == "elasticsearch" and self._config.elasticsearch_url:
1184
+ storages.append(
1185
+ ElasticsearchAuditStorage(
1186
+ self._config.elasticsearch_url,
1187
+ index_prefix=self._config.elasticsearch_index_prefix,
1188
+ username=self._config.elasticsearch_username,
1189
+ password=self._config.elasticsearch_password,
1190
+ )
1191
+ )
1192
+
1193
+ elif backend == "s3" and self._config.s3_bucket:
1194
+ storages.append(
1195
+ S3AuditStorage(
1196
+ self._config.s3_bucket,
1197
+ prefix=self._config.s3_prefix,
1198
+ region=self._config.s3_region,
1199
+ )
1200
+ )
1201
+
1202
+ elif backend == "kafka" and self._config.kafka_bootstrap_servers:
1203
+ storages.append(
1204
+ KafkaAuditStorage(
1205
+ self._config.kafka_bootstrap_servers,
1206
+ topic=self._config.kafka_topic,
1207
+ )
1208
+ )
1209
+
1210
+ return storages or [MemoryAuditStorage()]
1211
+
1212
+ def log(self, event: AuditEvent) -> None:
1213
+ """Log an audit event.
1214
+
1215
+ Args:
1216
+ event: Audit event to log.
1217
+ """
1218
+ if not self._config.enabled:
1219
+ return
1220
+
1221
+ # Add correlation context
1222
+ event.context.correlation_id = get_correlation_id() or ""
1223
+ event.context.service_name = self._config.service_name
1224
+ event.context.environment = self._config.environment
1225
+
1226
+ # Process event
1227
+ for processor in self._processors:
1228
+ event = processor.process(event)
1229
+
1230
+ # Write to all storages
1231
+ for storage in self._storages:
1232
+ try:
1233
+ storage.write(event)
1234
+ except Exception:
1235
+ pass
1236
+
1237
+ def log_operation(
1238
+ self,
1239
+ operation: str,
1240
+ resource: str,
1241
+ *,
1242
+ outcome: str = "success",
1243
+ actor_id: str = "",
1244
+ details: dict[str, Any] | None = None,
1245
+ duration_ms: float | None = None,
1246
+ ) -> None:
1247
+ """Log an operation (convenience method).
1248
+
1249
+ Args:
1250
+ operation: Operation name.
1251
+ resource: Resource identifier.
1252
+ outcome: Operation outcome.
1253
+ actor_id: Actor identifier.
1254
+ details: Additional details.
1255
+ duration_ms: Operation duration.
1256
+ """
1257
+ event = (
1258
+ AuditEventBuilder()
1259
+ .set_type(AuditEventType.CUSTOM)
1260
+ .set_action(operation)
1261
+ .set_outcome(AuditOutcome(outcome))
1262
+ .set_resource(id=resource, type=resource.split(":")[0] if ":" in resource else "resource")
1263
+ .set_data(details or {})
1264
+ .set_duration(duration_ms or 0)
1265
+ .build()
1266
+ )
1267
+
1268
+ if actor_id:
1269
+ event.actor = AuditActor(id=actor_id)
1270
+
1271
+ self.log(event)
1272
+
1273
+ def log_validation(
1274
+ self,
1275
+ dataset: str,
1276
+ *,
1277
+ success: bool,
1278
+ rows: int = 0,
1279
+ issues: int = 0,
1280
+ duration_ms: float = 0,
1281
+ validators: list[str] | None = None,
1282
+ ) -> None:
1283
+ """Log a validation operation.
1284
+
1285
+ Args:
1286
+ dataset: Dataset name.
1287
+ success: Whether validation passed.
1288
+ rows: Rows validated.
1289
+ issues: Issues found.
1290
+ duration_ms: Duration in milliseconds.
1291
+ validators: List of validators run.
1292
+ """
1293
+ event_type = (
1294
+ AuditEventType.VALIDATION_COMPLETE
1295
+ if success
1296
+ else AuditEventType.VALIDATION_FAILED
1297
+ )
1298
+
1299
+ event = (
1300
+ AuditEventBuilder()
1301
+ .set_type(event_type)
1302
+ .set_category(AuditCategory.VALIDATION)
1303
+ .set_action("validate")
1304
+ .set_outcome(AuditOutcome.SUCCESS if success else AuditOutcome.FAILURE)
1305
+ .set_resource(id=f"dataset:{dataset}", type="dataset", name=dataset)
1306
+ .set_data({
1307
+ "rows": rows,
1308
+ "issues": issues,
1309
+ "validators": validators or [],
1310
+ })
1311
+ .set_duration(duration_ms)
1312
+ .build()
1313
+ )
1314
+
1315
+ self.log(event)
1316
+
1317
+ def log_checkpoint(
1318
+ self,
1319
+ checkpoint: str,
1320
+ *,
1321
+ success: bool,
1322
+ validators_run: int = 0,
1323
+ issues: int = 0,
1324
+ duration_ms: float = 0,
1325
+ ) -> None:
1326
+ """Log a checkpoint execution.
1327
+
1328
+ Args:
1329
+ checkpoint: Checkpoint name.
1330
+ success: Whether checkpoint passed.
1331
+ validators_run: Number of validators run.
1332
+ issues: Total issues found.
1333
+ duration_ms: Duration in milliseconds.
1334
+ """
1335
+ event = (
1336
+ AuditEventBuilder()
1337
+ .set_type(AuditEventType.CHECKPOINT_RUN)
1338
+ .set_category(AuditCategory.VALIDATION)
1339
+ .set_action("checkpoint_run")
1340
+ .set_outcome(AuditOutcome.SUCCESS if success else AuditOutcome.FAILURE)
1341
+ .set_resource(id=f"checkpoint:{checkpoint}", type="checkpoint", name=checkpoint)
1342
+ .set_data({
1343
+ "validators_run": validators_run,
1344
+ "issues": issues,
1345
+ })
1346
+ .set_duration(duration_ms)
1347
+ .build()
1348
+ )
1349
+
1350
+ self.log(event)
1351
+
1352
+ @contextmanager
1353
+ def operation_context(
1354
+ self,
1355
+ operation: str,
1356
+ resource: str,
1357
+ **kwargs: Any,
1358
+ ) -> Iterator[None]:
1359
+ """Context manager for operation auditing.
1360
+
1361
+ Automatically logs start and completion of operation.
1362
+
1363
+ Args:
1364
+ operation: Operation name.
1365
+ resource: Resource identifier.
1366
+ **kwargs: Additional event data.
1367
+ """
1368
+ start_time = time.time()
1369
+ success = True
1370
+ error_msg = ""
1371
+
1372
+ try:
1373
+ yield
1374
+ except Exception as e:
1375
+ success = False
1376
+ error_msg = str(e)
1377
+ raise
1378
+ finally:
1379
+ duration_ms = (time.time() - start_time) * 1000
1380
+ self.log_operation(
1381
+ operation,
1382
+ resource,
1383
+ outcome="success" if success else "failure",
1384
+ details={"error": error_msg} if error_msg else kwargs,
1385
+ duration_ms=duration_ms,
1386
+ )
1387
+
1388
+ def query(
1389
+ self,
1390
+ start_time: datetime | None = None,
1391
+ end_time: datetime | None = None,
1392
+ event_types: list[AuditEventType] | None = None,
1393
+ actor_id: str | None = None,
1394
+ limit: int = 100,
1395
+ ) -> list[AuditEvent]:
1396
+ """Query audit events.
1397
+
1398
+ Uses the first queryable storage backend.
1399
+
1400
+ Args:
1401
+ start_time: Start of time range.
1402
+ end_time: End of time range.
1403
+ event_types: Filter by event types.
1404
+ actor_id: Filter by actor.
1405
+ limit: Maximum results.
1406
+
1407
+ Returns:
1408
+ List of matching events.
1409
+ """
1410
+ for storage in self._storages:
1411
+ try:
1412
+ return storage.query(
1413
+ start_time=start_time,
1414
+ end_time=end_time,
1415
+ event_types=event_types,
1416
+ actor_id=actor_id,
1417
+ limit=limit,
1418
+ )
1419
+ except Exception:
1420
+ continue
1421
+ return []
1422
+
1423
+ def get_compliance_reporter(self) -> ComplianceReporter:
1424
+ """Get compliance reporter.
1425
+
1426
+ Returns:
1427
+ ComplianceReporter instance.
1428
+ """
1429
+ # Use first queryable storage
1430
+ for storage in self._storages:
1431
+ if hasattr(storage, "query"):
1432
+ return ComplianceReporter(storage)
1433
+ return ComplianceReporter(MemoryAuditStorage())
1434
+
1435
+ def flush(self) -> None:
1436
+ """Flush all storage backends."""
1437
+ for storage in self._storages:
1438
+ try:
1439
+ storage.flush()
1440
+ except Exception:
1441
+ pass
1442
+
1443
+ def close(self) -> None:
1444
+ """Close all storage backends."""
1445
+ for storage in self._storages:
1446
+ try:
1447
+ storage.close()
1448
+ except Exception:
1449
+ pass
1450
+
1451
+
1452
+ # =============================================================================
1453
+ # Global Audit Logger
1454
+ # =============================================================================
1455
+
1456
+ _global_logger: EnterpriseAuditLogger | None = None
1457
+ _lock = threading.Lock()
1458
+
1459
+
1460
+ def configure_audit(
1461
+ *,
1462
+ service_name: str = "",
1463
+ environment: str = "",
1464
+ storage_backends: list[str] | None = None,
1465
+ elasticsearch_url: str = "",
1466
+ s3_bucket: str = "",
1467
+ kafka_bootstrap_servers: str = "",
1468
+ **kwargs: Any,
1469
+ ) -> EnterpriseAuditLogger:
1470
+ """Configure global audit logger.
1471
+
1472
+ Args:
1473
+ service_name: Service name.
1474
+ environment: Environment name.
1475
+ storage_backends: Storage backends to use.
1476
+ elasticsearch_url: Elasticsearch URL.
1477
+ s3_bucket: S3 bucket name.
1478
+ kafka_bootstrap_servers: Kafka bootstrap servers.
1479
+ **kwargs: Additional EnterpriseAuditConfig parameters.
1480
+
1481
+ Returns:
1482
+ Configured EnterpriseAuditLogger.
1483
+ """
1484
+ global _global_logger
1485
+
1486
+ with _lock:
1487
+ if _global_logger:
1488
+ _global_logger.close()
1489
+
1490
+ config = EnterpriseAuditConfig(
1491
+ service_name=service_name,
1492
+ environment=environment,
1493
+ storage_backends=storage_backends or ["memory"],
1494
+ elasticsearch_url=elasticsearch_url,
1495
+ s3_bucket=s3_bucket,
1496
+ kafka_bootstrap_servers=kafka_bootstrap_servers,
1497
+ **kwargs,
1498
+ )
1499
+
1500
+ _global_logger = EnterpriseAuditLogger(config)
1501
+ return _global_logger
1502
+
1503
+
1504
+ def get_audit_logger() -> EnterpriseAuditLogger:
1505
+ """Get the global audit logger.
1506
+
1507
+ Returns:
1508
+ EnterpriseAuditLogger instance.
1509
+ """
1510
+ global _global_logger
1511
+
1512
+ with _lock:
1513
+ if _global_logger is None:
1514
+ _global_logger = EnterpriseAuditLogger()
1515
+ return _global_logger