truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1247 @@
1
+ """Comprehensive SQL security module.
2
+
3
+ Provides extensible SQL injection protection with:
4
+ - Multi-level security policies
5
+ - Parameterized query support
6
+ - Whitelist-based validation
7
+ - Query audit logging
8
+ - Pluggable security rules
9
+
10
+ Security Levels:
11
+ STRICT: Maximum security, minimal allowed operations
12
+ STANDARD: Balanced security for typical use cases (default)
13
+ PERMISSIVE: Relaxed security for trusted environments
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import hashlib
19
+ import logging
20
+ import re
21
+ from abc import ABC, abstractmethod
22
+ from dataclasses import dataclass, field
23
+ from datetime import datetime
24
+ from enum import Enum, auto
25
+ from typing import Any, Callable, Iterator
26
+
27
+ import polars as pl
28
+
29
+
30
+ # =============================================================================
31
+ # Exceptions
32
+ # =============================================================================
33
+
34
+
35
+ class SQLSecurityError(Exception):
36
+ """Base exception for SQL security issues."""
37
+
38
+ pass
39
+
40
+
41
+ class SQLInjectionError(SQLSecurityError):
42
+ """Raised when potential SQL injection is detected."""
43
+
44
+ def __init__(self, message: str, pattern: str | None = None, query: str | None = None):
45
+ super().__init__(message)
46
+ self.pattern = pattern
47
+ self.query = query[:100] + "..." if query and len(query) > 100 else query
48
+
49
+
50
+ class QueryValidationError(SQLSecurityError):
51
+ """Raised when query validation fails."""
52
+
53
+ pass
54
+
55
+
56
+ # =============================================================================
57
+ # Security Levels and Policies
58
+ # =============================================================================
59
+
60
+
61
+ class SecurityLevel(Enum):
62
+ """Security level presets."""
63
+
64
+ STRICT = auto() # Maximum security
65
+ STANDARD = auto() # Balanced (default)
66
+ PERMISSIVE = auto() # Relaxed for trusted environments
67
+
68
+
69
+ @dataclass
70
+ class SecurityPolicy:
71
+ """Configurable security policy for SQL validation.
72
+
73
+ Defines what operations are allowed and blocked.
74
+
75
+ Example:
76
+ # Custom policy for analytics queries
77
+ policy = SecurityPolicy(
78
+ level=SecurityLevel.STANDARD,
79
+ max_query_length=20000,
80
+ allow_joins=True,
81
+ allow_subqueries=True,
82
+ allow_aggregations=True,
83
+ blocked_functions=["SLEEP", "BENCHMARK", "LOAD_FILE"],
84
+ )
85
+ """
86
+
87
+ # Basic settings
88
+ level: SecurityLevel = SecurityLevel.STANDARD
89
+ max_query_length: int = 10000
90
+ max_identifier_length: int = 128
91
+
92
+ # Structural permissions
93
+ allow_joins: bool = True
94
+ allow_subqueries: bool = True
95
+ allow_aggregations: bool = True
96
+ allow_window_functions: bool = True
97
+ allow_cte: bool = True # Common Table Expressions (WITH clause)
98
+ allow_union: bool = False # UNION can be used for injection
99
+
100
+ # Statement types
101
+ allowed_statements: set[str] = field(
102
+ default_factory=lambda: {"SELECT", "WITH"}
103
+ )
104
+
105
+ # Blocked patterns (regex)
106
+ blocked_patterns: list[str] = field(default_factory=list)
107
+
108
+ # Blocked SQL functions
109
+ blocked_functions: list[str] = field(
110
+ default_factory=lambda: [
111
+ "SLEEP",
112
+ "BENCHMARK",
113
+ "LOAD_FILE",
114
+ "INTO OUTFILE",
115
+ "INTO DUMPFILE",
116
+ ]
117
+ )
118
+
119
+ # Allowed tables/columns (if empty, all are allowed)
120
+ allowed_tables: set[str] = field(default_factory=set)
121
+ allowed_columns: set[str] = field(default_factory=set)
122
+
123
+ # Callbacks
124
+ on_violation: Callable[[str, str], None] | None = None
125
+
126
+ @classmethod
127
+ def strict(cls) -> "SecurityPolicy":
128
+ """Create a strict security policy."""
129
+ return cls(
130
+ level=SecurityLevel.STRICT,
131
+ max_query_length=5000,
132
+ allow_joins=False,
133
+ allow_subqueries=False,
134
+ allow_union=False,
135
+ allow_cte=False,
136
+ )
137
+
138
+ @classmethod
139
+ def standard(cls) -> "SecurityPolicy":
140
+ """Create a standard security policy."""
141
+ return cls(level=SecurityLevel.STANDARD)
142
+
143
+ @classmethod
144
+ def permissive(cls) -> "SecurityPolicy":
145
+ """Create a permissive security policy."""
146
+ return cls(
147
+ level=SecurityLevel.PERMISSIVE,
148
+ max_query_length=50000,
149
+ allow_joins=True,
150
+ allow_subqueries=True,
151
+ allow_union=True,
152
+ allow_cte=True,
153
+ )
154
+
155
+
156
+ # =============================================================================
157
+ # Pattern-based Validation
158
+ # =============================================================================
159
+
160
+
161
+ @dataclass
162
+ class DangerousPattern:
163
+ """A dangerous SQL pattern to detect."""
164
+
165
+ name: str
166
+ pattern: str
167
+ severity: str = "HIGH" # HIGH, MEDIUM, LOW
168
+ description: str = ""
169
+
170
+
171
+ class PatternRegistry:
172
+ """Registry of dangerous SQL patterns.
173
+
174
+ Extensible registry for SQL injection patterns.
175
+
176
+ Example:
177
+ registry = PatternRegistry()
178
+ registry.register(DangerousPattern(
179
+ name="time_based_injection",
180
+ pattern=r"WAITFOR\s+DELAY",
181
+ severity="HIGH",
182
+ description="Time-based SQL injection"
183
+ ))
184
+ """
185
+
186
+ # Default dangerous patterns
187
+ DEFAULT_PATTERNS = [
188
+ # DDL statements
189
+ DangerousPattern(
190
+ "ddl_create",
191
+ r"\b(CREATE)\s+(TABLE|DATABASE|INDEX|VIEW|SCHEMA|PROCEDURE|FUNCTION)\b",
192
+ "HIGH",
193
+ "DDL CREATE statement",
194
+ ),
195
+ DangerousPattern(
196
+ "ddl_alter",
197
+ r"\b(ALTER)\s+(TABLE|DATABASE|INDEX|VIEW|SCHEMA)\b",
198
+ "HIGH",
199
+ "DDL ALTER statement",
200
+ ),
201
+ DangerousPattern(
202
+ "ddl_drop",
203
+ r"\b(DROP)\s+(TABLE|DATABASE|INDEX|VIEW|SCHEMA)\b",
204
+ "HIGH",
205
+ "DDL DROP statement",
206
+ ),
207
+ DangerousPattern(
208
+ "ddl_truncate",
209
+ r"\bTRUNCATE\s+TABLE\b",
210
+ "HIGH",
211
+ "DDL TRUNCATE statement",
212
+ ),
213
+ # DCL statements
214
+ DangerousPattern(
215
+ "dcl_grant",
216
+ r"\b(GRANT|REVOKE|DENY)\b",
217
+ "HIGH",
218
+ "DCL statement",
219
+ ),
220
+ # DML modification
221
+ DangerousPattern(
222
+ "dml_insert",
223
+ r"\bINSERT\s+INTO\b",
224
+ "HIGH",
225
+ "INSERT statement",
226
+ ),
227
+ DangerousPattern(
228
+ "dml_update",
229
+ r"\bUPDATE\s+\w+\s+SET\b",
230
+ "HIGH",
231
+ "UPDATE statement",
232
+ ),
233
+ DangerousPattern(
234
+ "dml_delete",
235
+ r"\bDELETE\s+FROM\b",
236
+ "HIGH",
237
+ "DELETE statement",
238
+ ),
239
+ # Transaction control
240
+ DangerousPattern(
241
+ "transaction",
242
+ r"\b(COMMIT|ROLLBACK|SAVEPOINT|BEGIN\s+TRANSACTION)\b",
243
+ "MEDIUM",
244
+ "Transaction control",
245
+ ),
246
+ # System/Exec
247
+ DangerousPattern(
248
+ "exec",
249
+ r"\b(EXEC|EXECUTE|CALL)\s*\(",
250
+ "HIGH",
251
+ "Execute/call statement",
252
+ ),
253
+ # File operations
254
+ DangerousPattern(
255
+ "file_ops",
256
+ r"\b(LOAD_FILE|INTO\s+OUTFILE|INTO\s+DUMPFILE)\b",
257
+ "HIGH",
258
+ "File operation",
259
+ ),
260
+ # Comment injection
261
+ DangerousPattern(
262
+ "line_comment",
263
+ r"--\s*$",
264
+ "MEDIUM",
265
+ "Line comment at end (potential injection)",
266
+ ),
267
+ DangerousPattern(
268
+ "block_comment",
269
+ r"/\*[^*]*\*+(?:[^/*][^*]*\*+)*/",
270
+ "LOW",
271
+ "Block comment",
272
+ ),
273
+ # Stacked queries
274
+ DangerousPattern(
275
+ "stacked_query",
276
+ r";\s*(SELECT|INSERT|UPDATE|DELETE|DROP|CREATE|ALTER|EXEC)",
277
+ "HIGH",
278
+ "Stacked query",
279
+ ),
280
+ # Union injection
281
+ DangerousPattern(
282
+ "union_select",
283
+ r"\bUNION\s+(ALL\s+)?SELECT\b",
284
+ "MEDIUM",
285
+ "UNION SELECT (potential injection)",
286
+ ),
287
+ # Time-based injection
288
+ DangerousPattern(
289
+ "sleep",
290
+ r"\b(SLEEP|WAITFOR\s+DELAY|BENCHMARK)\s*\(",
291
+ "HIGH",
292
+ "Time-based injection",
293
+ ),
294
+ # Error-based injection
295
+ DangerousPattern(
296
+ "extractvalue",
297
+ r"\b(EXTRACTVALUE|UPDATEXML|EXP|FLOOR\s*\(\s*RAND)\b",
298
+ "MEDIUM",
299
+ "Error-based injection function",
300
+ ),
301
+ # Boolean-based injection patterns
302
+ DangerousPattern(
303
+ "always_true",
304
+ r"(?:OR|AND)\s+['\"0-9]+\s*=\s*['\"0-9]+",
305
+ "MEDIUM",
306
+ "Always true/false condition",
307
+ ),
308
+ DangerousPattern(
309
+ "or_1_eq_1",
310
+ r"\bOR\s+1\s*=\s*1\b",
311
+ "HIGH",
312
+ "Classic OR 1=1 injection",
313
+ ),
314
+ ]
315
+
316
+ def __init__(self) -> None:
317
+ self._patterns: list[DangerousPattern] = []
318
+ self._compiled: list[tuple[DangerousPattern, re.Pattern]] = []
319
+
320
+ # Register default patterns
321
+ for pattern in self.DEFAULT_PATTERNS:
322
+ self.register(pattern)
323
+
324
+ def register(self, pattern: DangerousPattern) -> None:
325
+ """Register a new dangerous pattern."""
326
+ self._patterns.append(pattern)
327
+ compiled = re.compile(pattern.pattern, re.IGNORECASE | re.MULTILINE)
328
+ self._compiled.append((pattern, compiled))
329
+
330
+ def unregister(self, name: str) -> bool:
331
+ """Unregister a pattern by name."""
332
+ for i, p in enumerate(self._patterns):
333
+ if p.name == name:
334
+ del self._patterns[i]
335
+ del self._compiled[i]
336
+ return True
337
+ return False
338
+
339
+ def check(self, query: str) -> list[tuple[DangerousPattern, str]]:
340
+ """Check query against all patterns.
341
+
342
+ Returns:
343
+ List of (pattern, matched_text) tuples
344
+ """
345
+ matches = []
346
+ for pattern, compiled in self._compiled:
347
+ match = compiled.search(query)
348
+ if match:
349
+ matches.append((pattern, match.group()))
350
+ return matches
351
+
352
+ def __iter__(self) -> Iterator[DangerousPattern]:
353
+ return iter(self._patterns)
354
+
355
+
356
+ # =============================================================================
357
+ # Core SQL Validator
358
+ # =============================================================================
359
+
360
+
361
+ class SQLQueryValidator:
362
+ """Enhanced SQL query validator with pluggable policies.
363
+
364
+ Validates SQL queries for security issues using configurable policies
365
+ and pattern-based detection.
366
+
367
+ Example:
368
+ # With default policy
369
+ validator = SQLQueryValidator()
370
+ validator.validate("SELECT * FROM users") # OK
371
+
372
+ # With custom policy
373
+ policy = SecurityPolicy.strict()
374
+ validator = SQLQueryValidator(policy=policy)
375
+ validator.validate("SELECT * FROM users JOIN orders") # Raises error
376
+ """
377
+
378
+ def __init__(
379
+ self,
380
+ policy: SecurityPolicy | None = None,
381
+ pattern_registry: PatternRegistry | None = None,
382
+ audit_logger: "QueryAuditLogger | None" = None,
383
+ ):
384
+ """Initialize SQL query validator.
385
+
386
+ Args:
387
+ policy: Security policy to use (default: STANDARD)
388
+ pattern_registry: Custom pattern registry
389
+ audit_logger: Optional audit logger
390
+ """
391
+ self.policy = policy or SecurityPolicy.standard()
392
+ self.pattern_registry = pattern_registry or PatternRegistry()
393
+ self.audit_logger = audit_logger
394
+
395
+ # Apply policy-specific patterns
396
+ self._apply_policy_patterns()
397
+
398
+ def _apply_policy_patterns(self) -> None:
399
+ """Apply additional patterns based on policy."""
400
+ # Block UNION if not allowed
401
+ if not self.policy.allow_union:
402
+ self.pattern_registry.register(
403
+ DangerousPattern(
404
+ "policy_union",
405
+ r"\bUNION\b",
406
+ "MEDIUM",
407
+ "UNION blocked by policy",
408
+ )
409
+ )
410
+
411
+ # Block subqueries if not allowed
412
+ if not self.policy.allow_subqueries:
413
+ self.pattern_registry.register(
414
+ DangerousPattern(
415
+ "policy_subquery",
416
+ r"\(\s*SELECT\b",
417
+ "MEDIUM",
418
+ "Subquery blocked by policy",
419
+ )
420
+ )
421
+
422
+ # Block joins if not allowed
423
+ if not self.policy.allow_joins:
424
+ self.pattern_registry.register(
425
+ DangerousPattern(
426
+ "policy_join",
427
+ r"\b(INNER|LEFT|RIGHT|FULL|CROSS)?\s*JOIN\b",
428
+ "MEDIUM",
429
+ "JOIN blocked by policy",
430
+ )
431
+ )
432
+
433
+ # Add custom blocked patterns
434
+ for i, pattern in enumerate(self.policy.blocked_patterns):
435
+ self.pattern_registry.register(
436
+ DangerousPattern(
437
+ f"custom_blocked_{i}",
438
+ pattern,
439
+ "HIGH",
440
+ "Custom blocked pattern",
441
+ )
442
+ )
443
+
444
+ # Add blocked functions
445
+ for func in self.policy.blocked_functions:
446
+ self.pattern_registry.register(
447
+ DangerousPattern(
448
+ f"blocked_func_{func.lower()}",
449
+ rf"\b{re.escape(func)}\s*\(",
450
+ "HIGH",
451
+ f"Blocked function: {func}",
452
+ )
453
+ )
454
+
455
+ def validate(self, query: str) -> None:
456
+ """Validate a SQL query for security issues.
457
+
458
+ Args:
459
+ query: SQL query string to validate
460
+
461
+ Raises:
462
+ QueryValidationError: If query fails basic validation
463
+ SQLInjectionError: If potential injection is detected
464
+ """
465
+ if not query or not query.strip():
466
+ raise QueryValidationError("Empty query")
467
+
468
+ # Check length
469
+ if len(query) > self.policy.max_query_length:
470
+ raise QueryValidationError(
471
+ f"Query exceeds maximum length of {self.policy.max_query_length}"
472
+ )
473
+
474
+ normalized = query.strip()
475
+
476
+ # Check statement type
477
+ self._validate_statement_type(normalized)
478
+
479
+ # Check for multiple statements
480
+ self._check_multiple_statements(normalized)
481
+
482
+ # Check against pattern registry
483
+ matches = self.pattern_registry.check(normalized)
484
+ if matches:
485
+ pattern, matched = matches[0]
486
+ if self.policy.on_violation:
487
+ self.policy.on_violation(pattern.name, matched)
488
+ raise SQLInjectionError(
489
+ f"Dangerous pattern detected: {pattern.description}",
490
+ pattern=pattern.pattern,
491
+ query=query,
492
+ )
493
+
494
+ # Validate table names
495
+ if self.policy.allowed_tables:
496
+ self._validate_table_names(normalized)
497
+
498
+ # Log successful validation
499
+ if self.audit_logger:
500
+ self.audit_logger.log_query(query, success=True)
501
+
502
+ def _validate_statement_type(self, query: str) -> None:
503
+ """Validate statement type is allowed."""
504
+ match = re.match(r"^\s*(\w+)", query, re.IGNORECASE)
505
+ if not match:
506
+ raise QueryValidationError("Could not determine SQL statement type")
507
+
508
+ statement_type = match.group(1).upper()
509
+ if statement_type not in self.policy.allowed_statements:
510
+ raise QueryValidationError(
511
+ f"Statement type '{statement_type}' not allowed. "
512
+ f"Allowed: {', '.join(self.policy.allowed_statements)}"
513
+ )
514
+
515
+ def _check_multiple_statements(self, query: str) -> None:
516
+ """Check for multiple statements."""
517
+ # Remove string literals
518
+ cleaned = re.sub(r"'[^']*'", "", query)
519
+ cleaned = re.sub(r'"[^"]*"', "", cleaned)
520
+
521
+ if re.search(r";\s*\S", cleaned):
522
+ raise SQLInjectionError(
523
+ "Multiple statements detected",
524
+ pattern="stacked_query",
525
+ query=query,
526
+ )
527
+
528
+ def _validate_table_names(self, query: str) -> None:
529
+ """Validate table names against whitelist."""
530
+ table_pattern = r"\b(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_]*)\b"
531
+ matches = re.findall(table_pattern, query, re.IGNORECASE)
532
+
533
+ allowed_lower = {t.lower() for t in self.policy.allowed_tables}
534
+ for table in matches:
535
+ if table.lower() not in allowed_lower:
536
+ raise QueryValidationError(
537
+ f"Table '{table}' not in allowed list: "
538
+ f"{', '.join(self.policy.allowed_tables)}"
539
+ )
540
+
541
+
542
+ def validate_sql_query(
543
+ query: str,
544
+ policy: SecurityPolicy | None = None,
545
+ allowed_tables: list[str] | None = None,
546
+ ) -> None:
547
+ """Convenience function to validate SQL query.
548
+
549
+ Args:
550
+ query: SQL query to validate
551
+ policy: Optional security policy
552
+ allowed_tables: Optional table whitelist
553
+
554
+ Raises:
555
+ SQLSecurityError: If validation fails
556
+ """
557
+ if policy is None:
558
+ policy = SecurityPolicy.standard()
559
+
560
+ if allowed_tables:
561
+ policy.allowed_tables = set(allowed_tables)
562
+
563
+ validator = SQLQueryValidator(policy=policy)
564
+ validator.validate(query)
565
+
566
+
567
+ # =============================================================================
568
+ # Whitelist Validation
569
+ # =============================================================================
570
+
571
+
572
+ @dataclass
573
+ class SchemaWhitelist:
574
+ """Schema-aware whitelist for tables and columns.
575
+
576
+ Example:
577
+ whitelist = SchemaWhitelist()
578
+ whitelist.add_table("orders", ["id", "customer_id", "amount", "status"])
579
+ whitelist.add_table("customers", ["id", "name", "email"])
580
+
581
+ whitelist.validate_table("orders") # OK
582
+ whitelist.validate_column("orders", "amount") # OK
583
+ whitelist.validate_column("orders", "password") # Raises error
584
+ """
585
+
586
+ tables: dict[str, set[str]] = field(default_factory=dict)
587
+ allow_all_columns: bool = False
588
+
589
+ def add_table(self, table: str, columns: list[str] | None = None) -> None:
590
+ """Add a table to the whitelist.
591
+
592
+ Args:
593
+ table: Table name
594
+ columns: Allowed columns (None = all columns allowed)
595
+ """
596
+ self.tables[table.lower()] = set(c.lower() for c in columns) if columns else set()
597
+
598
+ def remove_table(self, table: str) -> None:
599
+ """Remove a table from the whitelist."""
600
+ self.tables.pop(table.lower(), None)
601
+
602
+ def validate_table(self, table: str) -> None:
603
+ """Validate table is in whitelist."""
604
+ if table.lower() not in self.tables:
605
+ raise QueryValidationError(
606
+ f"Table '{table}' not in whitelist. "
607
+ f"Allowed: {', '.join(self.tables.keys())}"
608
+ )
609
+
610
+ def validate_column(self, table: str, column: str) -> None:
611
+ """Validate column is in whitelist for table."""
612
+ self.validate_table(table)
613
+
614
+ columns = self.tables[table.lower()]
615
+ if columns and column.lower() not in columns:
616
+ raise QueryValidationError(
617
+ f"Column '{column}' not allowed for table '{table}'. "
618
+ f"Allowed: {', '.join(columns)}"
619
+ )
620
+
621
+ def get_tables(self) -> list[str]:
622
+ """Get list of allowed tables."""
623
+ return list(self.tables.keys())
624
+
625
+ def get_columns(self, table: str) -> list[str]:
626
+ """Get list of allowed columns for table."""
627
+ return list(self.tables.get(table.lower(), []))
628
+
629
+
630
+ class WhitelistValidator:
631
+ """Validates queries against schema whitelist.
632
+
633
+ Example:
634
+ whitelist = SchemaWhitelist()
635
+ whitelist.add_table("orders", ["id", "amount"])
636
+
637
+ validator = WhitelistValidator(whitelist)
638
+ validator.validate_query("SELECT id, amount FROM orders") # OK
639
+ validator.validate_query("SELECT password FROM users") # Raises error
640
+ """
641
+
642
+ def __init__(self, schema: SchemaWhitelist):
643
+ self.schema = schema
644
+
645
+ def validate_query(self, query: str) -> None:
646
+ """Validate query against whitelist."""
647
+ # Extract table references
648
+ table_pattern = r"\b(?:FROM|JOIN)\s+([a-zA-Z_][a-zA-Z0-9_]*)\b"
649
+ tables = re.findall(table_pattern, query, re.IGNORECASE)
650
+
651
+ for table in tables:
652
+ self.schema.validate_table(table)
653
+
654
+ # Extract column references (simplified)
655
+ # Note: Full SQL parsing would require a proper parser
656
+ select_pattern = r"SELECT\s+(.+?)\s+FROM"
657
+ match = re.search(select_pattern, query, re.IGNORECASE | re.DOTALL)
658
+ if match and tables:
659
+ columns_str = match.group(1)
660
+ if columns_str.strip() != "*":
661
+ # Parse column list
662
+ columns = [c.strip().split(".")[-1] for c in columns_str.split(",")]
663
+ for col in columns:
664
+ # Remove aliases
665
+ col = re.sub(r"\s+AS\s+\w+$", "", col, flags=re.IGNORECASE).strip()
666
+ if col and not col.startswith("("):
667
+ # Validate against first table (simplified)
668
+ self.schema.validate_column(tables[0], col)
669
+
670
+
671
+ # =============================================================================
672
+ # Parameterized Queries
673
+ # =============================================================================
674
+
675
+
676
+ @dataclass
677
+ class ParameterizedQuery:
678
+ """A parameterized SQL query.
679
+
680
+ Stores query template and parameters separately for safe execution.
681
+
682
+ Example:
683
+ query = ParameterizedQuery(
684
+ template="SELECT * FROM orders WHERE amount > :min_amount",
685
+ parameters={"min_amount": 100}
686
+ )
687
+ """
688
+
689
+ template: str
690
+ parameters: dict[str, Any] = field(default_factory=dict)
691
+
692
+ def __post_init__(self) -> None:
693
+ """Validate template and parameters."""
694
+ # Find all parameter placeholders
695
+ placeholders = set(re.findall(r":(\w+)", self.template))
696
+
697
+ # Check all parameters are provided
698
+ missing = placeholders - set(self.parameters.keys())
699
+ if missing:
700
+ raise QueryValidationError(
701
+ f"Missing parameters: {', '.join(missing)}"
702
+ )
703
+
704
+ def render(self) -> str:
705
+ """Render the query with parameters.
706
+
707
+ Note: For Polars SQL, parameters are substituted directly.
708
+ Values are escaped to prevent injection.
709
+ """
710
+ result = self.template
711
+ for key, value in self.parameters.items():
712
+ placeholder = f":{key}"
713
+ escaped_value = self._escape_value(value)
714
+ result = result.replace(placeholder, escaped_value)
715
+ return result
716
+
717
+ def _escape_value(self, value: Any) -> str:
718
+ """Escape a parameter value for SQL."""
719
+ if value is None:
720
+ return "NULL"
721
+ elif isinstance(value, bool):
722
+ return "TRUE" if value else "FALSE"
723
+ elif isinstance(value, (int, float)):
724
+ return str(value)
725
+ elif isinstance(value, str):
726
+ # Escape single quotes
727
+ escaped = value.replace("'", "''")
728
+ return f"'{escaped}'"
729
+ elif isinstance(value, (list, tuple)):
730
+ escaped_items = [self._escape_value(v) for v in value]
731
+ return f"({', '.join(escaped_items)})"
732
+ else:
733
+ raise QueryValidationError(
734
+ f"Unsupported parameter type: {type(value)}"
735
+ )
736
+
737
+
738
+ class SecureSQLBuilder:
739
+ """Builder for secure SQL queries with parameterization.
740
+
741
+ Provides a fluent interface for building secure SQL queries
742
+ with automatic parameter escaping and validation.
743
+
744
+ Example:
745
+ builder = SecureSQLBuilder(allowed_tables=["orders", "customers"])
746
+
747
+ query = (
748
+ builder
749
+ .select("orders", ["id", "amount", "status"])
750
+ .where("amount > :min_amount")
751
+ .where("status = :status")
752
+ .order_by("amount", desc=True)
753
+ .limit(100)
754
+ .build({"min_amount": 100, "status": "pending"})
755
+ )
756
+
757
+ # Execute with context
758
+ result = builder.execute(ctx, query)
759
+ """
760
+
761
+ def __init__(
762
+ self,
763
+ allowed_tables: list[str] | None = None,
764
+ policy: SecurityPolicy | None = None,
765
+ ):
766
+ self.allowed_tables = set(allowed_tables) if allowed_tables else None
767
+ self.policy = policy or SecurityPolicy.standard()
768
+ self.validator = SQLQueryValidator(policy=self.policy)
769
+
770
+ # Query parts
771
+ self._select_table: str | None = None
772
+ self._select_columns: list[str] = []
773
+ self._joins: list[str] = []
774
+ self._where_clauses: list[str] = []
775
+ self._group_by: list[str] = []
776
+ self._having_clauses: list[str] = []
777
+ self._order_by: list[str] = []
778
+ self._limit_value: int | None = None
779
+ self._offset_value: int | None = None
780
+
781
+ def select(
782
+ self,
783
+ table: str,
784
+ columns: list[str] | None = None,
785
+ ) -> "SecureSQLBuilder":
786
+ """Set SELECT table and columns.
787
+
788
+ Args:
789
+ table: Table name
790
+ columns: Columns to select (None = all)
791
+ """
792
+ self._validate_identifier(table)
793
+ if self.allowed_tables and table not in self.allowed_tables:
794
+ raise QueryValidationError(
795
+ f"Table '{table}' not in allowed list"
796
+ )
797
+
798
+ self._select_table = table
799
+
800
+ if columns:
801
+ for col in columns:
802
+ self._validate_identifier(col)
803
+ self._select_columns = columns
804
+ else:
805
+ self._select_columns = ["*"]
806
+
807
+ return self
808
+
809
+ def join(
810
+ self,
811
+ table: str,
812
+ on: str,
813
+ join_type: str = "INNER",
814
+ ) -> "SecureSQLBuilder":
815
+ """Add a JOIN clause.
816
+
817
+ Args:
818
+ table: Table to join
819
+ on: Join condition
820
+ join_type: Type of join (INNER, LEFT, RIGHT, etc.)
821
+ """
822
+ if not self.policy.allow_joins:
823
+ raise QueryValidationError("JOINs not allowed by policy")
824
+
825
+ self._validate_identifier(table)
826
+ if self.allowed_tables and table not in self.allowed_tables:
827
+ raise QueryValidationError(
828
+ f"Table '{table}' not in allowed list"
829
+ )
830
+
831
+ join_type = join_type.upper()
832
+ if join_type not in {"INNER", "LEFT", "RIGHT", "FULL", "CROSS"}:
833
+ raise QueryValidationError(f"Invalid join type: {join_type}")
834
+
835
+ self._joins.append(f"{join_type} JOIN {table} ON {on}")
836
+ return self
837
+
838
+ def where(self, condition: str) -> "SecureSQLBuilder":
839
+ """Add a WHERE condition.
840
+
841
+ Args:
842
+ condition: WHERE condition (can include :param placeholders)
843
+ """
844
+ self._where_clauses.append(condition)
845
+ return self
846
+
847
+ def group_by(self, *columns: str) -> "SecureSQLBuilder":
848
+ """Add GROUP BY columns."""
849
+ for col in columns:
850
+ self._validate_identifier(col.split(".")[-1])
851
+ self._group_by.extend(columns)
852
+ return self
853
+
854
+ def having(self, condition: str) -> "SecureSQLBuilder":
855
+ """Add HAVING condition."""
856
+ self._having_clauses.append(condition)
857
+ return self
858
+
859
+ def order_by(self, column: str, desc: bool = False) -> "SecureSQLBuilder":
860
+ """Add ORDER BY column."""
861
+ self._validate_identifier(column.split(".")[-1])
862
+ direction = "DESC" if desc else "ASC"
863
+ self._order_by.append(f"{column} {direction}")
864
+ return self
865
+
866
+ def limit(self, n: int) -> "SecureSQLBuilder":
867
+ """Set LIMIT."""
868
+ if n < 0:
869
+ raise QueryValidationError("LIMIT must be non-negative")
870
+ self._limit_value = n
871
+ return self
872
+
873
+ def offset(self, n: int) -> "SecureSQLBuilder":
874
+ """Set OFFSET."""
875
+ if n < 0:
876
+ raise QueryValidationError("OFFSET must be non-negative")
877
+ self._offset_value = n
878
+ return self
879
+
880
+ def build(self, parameters: dict[str, Any] | None = None) -> ParameterizedQuery:
881
+ """Build the parameterized query.
882
+
883
+ Args:
884
+ parameters: Query parameters
885
+
886
+ Returns:
887
+ ParameterizedQuery ready for execution
888
+ """
889
+ if not self._select_table:
890
+ raise QueryValidationError("No table selected")
891
+
892
+ parts = []
893
+
894
+ # SELECT
895
+ columns_str = ", ".join(self._select_columns)
896
+ parts.append(f"SELECT {columns_str}")
897
+
898
+ # FROM
899
+ parts.append(f"FROM {self._select_table}")
900
+
901
+ # JOINs
902
+ for join in self._joins:
903
+ parts.append(join)
904
+
905
+ # WHERE
906
+ if self._where_clauses:
907
+ conditions = " AND ".join(f"({c})" for c in self._where_clauses)
908
+ parts.append(f"WHERE {conditions}")
909
+
910
+ # GROUP BY
911
+ if self._group_by:
912
+ parts.append(f"GROUP BY {', '.join(self._group_by)}")
913
+
914
+ # HAVING
915
+ if self._having_clauses:
916
+ conditions = " AND ".join(f"({c})" for c in self._having_clauses)
917
+ parts.append(f"HAVING {conditions}")
918
+
919
+ # ORDER BY
920
+ if self._order_by:
921
+ parts.append(f"ORDER BY {', '.join(self._order_by)}")
922
+
923
+ # LIMIT
924
+ if self._limit_value is not None:
925
+ parts.append(f"LIMIT {self._limit_value}")
926
+
927
+ # OFFSET
928
+ if self._offset_value is not None:
929
+ parts.append(f"OFFSET {self._offset_value}")
930
+
931
+ template = " ".join(parts)
932
+
933
+ return ParameterizedQuery(
934
+ template=template,
935
+ parameters=parameters or {},
936
+ )
937
+
938
+ def execute(
939
+ self,
940
+ ctx: pl.SQLContext,
941
+ query: ParameterizedQuery,
942
+ ) -> pl.DataFrame:
943
+ """Execute a parameterized query.
944
+
945
+ Args:
946
+ ctx: Polars SQL context
947
+ query: Parameterized query to execute
948
+
949
+ Returns:
950
+ Query result as DataFrame
951
+ """
952
+ rendered = query.render()
953
+
954
+ # Validate the rendered query
955
+ self.validator.validate(rendered)
956
+
957
+ return ctx.execute(rendered).collect()
958
+
959
+ def reset(self) -> "SecureSQLBuilder":
960
+ """Reset builder state."""
961
+ self._select_table = None
962
+ self._select_columns = []
963
+ self._joins = []
964
+ self._where_clauses = []
965
+ self._group_by = []
966
+ self._having_clauses = []
967
+ self._order_by = []
968
+ self._limit_value = None
969
+ self._offset_value = None
970
+ return self
971
+
972
+ def _validate_identifier(self, identifier: str) -> None:
973
+ """Validate SQL identifier."""
974
+ if not identifier:
975
+ raise QueryValidationError("Empty identifier")
976
+
977
+ if len(identifier) > self.policy.max_identifier_length:
978
+ raise QueryValidationError(
979
+ f"Identifier too long: {len(identifier)} > {self.policy.max_identifier_length}"
980
+ )
981
+
982
+ if not re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", identifier):
983
+ if identifier != "*":
984
+ raise QueryValidationError(
985
+ f"Invalid identifier '{identifier}': must be alphanumeric with underscores"
986
+ )
987
+
988
+
989
+ # =============================================================================
990
+ # Secure Query Mixin
991
+ # =============================================================================
992
+
993
+
994
+ class SecureQueryMixin:
995
+ """Mixin providing secure query execution for validators.
996
+
997
+ Use in validators that need to execute SQL queries safely.
998
+
999
+ Example:
1000
+ class MyValidator(BaseValidator, SecureQueryMixin):
1001
+ def validate(self, lf):
1002
+ query = self.build_secure_query(
1003
+ table="data",
1004
+ columns=["id", "value"],
1005
+ where="value > :threshold",
1006
+ parameters={"threshold": 100},
1007
+ )
1008
+ result = self.execute_secure_query(lf, query)
1009
+ return self.process_result(result)
1010
+ """
1011
+
1012
+ _security_policy: SecurityPolicy = SecurityPolicy.standard()
1013
+ _sql_validator: SQLQueryValidator | None = None
1014
+
1015
+ def set_security_policy(self, policy: SecurityPolicy) -> None:
1016
+ """Set security policy for query execution."""
1017
+ self._security_policy = policy
1018
+ self._sql_validator = SQLQueryValidator(policy=policy)
1019
+
1020
+ def get_sql_validator(self) -> SQLQueryValidator:
1021
+ """Get or create SQL validator."""
1022
+ if self._sql_validator is None:
1023
+ self._sql_validator = SQLQueryValidator(policy=self._security_policy)
1024
+ return self._sql_validator
1025
+
1026
+ def validate_query(self, query: str) -> None:
1027
+ """Validate a SQL query for security.
1028
+
1029
+ Args:
1030
+ query: Query to validate
1031
+
1032
+ Raises:
1033
+ SQLSecurityError: If validation fails
1034
+ """
1035
+ self.get_sql_validator().validate(query)
1036
+
1037
+ def build_secure_query(
1038
+ self,
1039
+ table: str,
1040
+ columns: list[str] | None = None,
1041
+ where: str | None = None,
1042
+ parameters: dict[str, Any] | None = None,
1043
+ allowed_tables: list[str] | None = None,
1044
+ ) -> ParameterizedQuery:
1045
+ """Build a secure parameterized query.
1046
+
1047
+ Args:
1048
+ table: Table name
1049
+ columns: Columns to select
1050
+ where: WHERE clause with :param placeholders
1051
+ parameters: Parameter values
1052
+ allowed_tables: Optional table whitelist
1053
+
1054
+ Returns:
1055
+ ParameterizedQuery
1056
+ """
1057
+ builder = SecureSQLBuilder(
1058
+ allowed_tables=allowed_tables,
1059
+ policy=self._security_policy,
1060
+ )
1061
+
1062
+ builder.select(table, columns)
1063
+ if where:
1064
+ builder.where(where)
1065
+
1066
+ return builder.build(parameters)
1067
+
1068
+ def execute_secure_query(
1069
+ self,
1070
+ lf: pl.LazyFrame,
1071
+ query: ParameterizedQuery,
1072
+ table_name: str = "data",
1073
+ ) -> pl.DataFrame:
1074
+ """Execute a parameterized query securely.
1075
+
1076
+ Args:
1077
+ lf: LazyFrame to query
1078
+ query: Parameterized query
1079
+ table_name: Name for table in SQL context
1080
+
1081
+ Returns:
1082
+ Query result
1083
+ """
1084
+ rendered = query.render()
1085
+ self.validate_query(rendered)
1086
+
1087
+ ctx = pl.SQLContext()
1088
+ ctx.register(table_name, lf)
1089
+ return ctx.execute(rendered).collect()
1090
+
1091
+
1092
+ # =============================================================================
1093
+ # Audit Logging
1094
+ # =============================================================================
1095
+
1096
+
1097
+ @dataclass
1098
+ class AuditEntry:
1099
+ """A single audit log entry."""
1100
+
1101
+ timestamp: datetime
1102
+ query_hash: str
1103
+ query_preview: str
1104
+ success: bool
1105
+ error_type: str | None = None
1106
+ error_message: str | None = None
1107
+ user: str | None = None
1108
+ context: dict[str, Any] = field(default_factory=dict)
1109
+
1110
+
1111
+ class QueryAuditLogger:
1112
+ """Audit logger for SQL query execution.
1113
+
1114
+ Logs all query attempts for security monitoring.
1115
+
1116
+ Example:
1117
+ logger = QueryAuditLogger()
1118
+ logger.log_query("SELECT * FROM users", success=True)
1119
+
1120
+ # Get recent entries
1121
+ for entry in logger.get_recent(10):
1122
+ print(f"{entry.timestamp}: {entry.query_preview}")
1123
+
1124
+ # Export to file
1125
+ logger.export_to_file("audit.log")
1126
+ """
1127
+
1128
+ def __init__(
1129
+ self,
1130
+ max_entries: int = 10000,
1131
+ log_full_queries: bool = False,
1132
+ python_logger: logging.Logger | None = None,
1133
+ ):
1134
+ """Initialize audit logger.
1135
+
1136
+ Args:
1137
+ max_entries: Maximum entries to keep in memory
1138
+ log_full_queries: Whether to log full query text
1139
+ python_logger: Optional Python logger for external logging
1140
+ """
1141
+ self.max_entries = max_entries
1142
+ self.log_full_queries = log_full_queries
1143
+ self.python_logger = python_logger
1144
+ self._entries: list[AuditEntry] = []
1145
+
1146
+ def log_query(
1147
+ self,
1148
+ query: str,
1149
+ success: bool,
1150
+ error: Exception | None = None,
1151
+ user: str | None = None,
1152
+ context: dict[str, Any] | None = None,
1153
+ ) -> None:
1154
+ """Log a query execution attempt.
1155
+
1156
+ Args:
1157
+ query: SQL query
1158
+ success: Whether execution succeeded
1159
+ error: Optional error that occurred
1160
+ user: Optional user identifier
1161
+ context: Optional additional context
1162
+ """
1163
+ # Create hash of query
1164
+ query_hash = hashlib.sha256(query.encode()).hexdigest()[:16]
1165
+
1166
+ # Create preview (first 100 chars)
1167
+ preview = query[:100] + "..." if len(query) > 100 else query
1168
+ if not self.log_full_queries:
1169
+ preview = re.sub(r"'[^']*'", "'***'", preview) # Mask string values
1170
+
1171
+ entry = AuditEntry(
1172
+ timestamp=datetime.now(),
1173
+ query_hash=query_hash,
1174
+ query_preview=preview,
1175
+ success=success,
1176
+ error_type=type(error).__name__ if error else None,
1177
+ error_message=str(error) if error else None,
1178
+ user=user,
1179
+ context=context or {},
1180
+ )
1181
+
1182
+ self._entries.append(entry)
1183
+
1184
+ # Trim if over limit
1185
+ if len(self._entries) > self.max_entries:
1186
+ self._entries = self._entries[-self.max_entries :]
1187
+
1188
+ # Log to Python logger if configured
1189
+ if self.python_logger:
1190
+ log_level = logging.INFO if success else logging.WARNING
1191
+ self.python_logger.log(
1192
+ log_level,
1193
+ f"SQL {'OK' if success else 'FAIL'} [{query_hash}]: {preview}",
1194
+ )
1195
+
1196
+ def get_recent(self, n: int = 100) -> list[AuditEntry]:
1197
+ """Get recent audit entries."""
1198
+ return self._entries[-n:]
1199
+
1200
+ def get_failures(self, n: int = 100) -> list[AuditEntry]:
1201
+ """Get recent failed queries."""
1202
+ failures = [e for e in self._entries if not e.success]
1203
+ return failures[-n:]
1204
+
1205
+ def get_by_hash(self, query_hash: str) -> list[AuditEntry]:
1206
+ """Get entries by query hash."""
1207
+ return [e for e in self._entries if e.query_hash == query_hash]
1208
+
1209
+ def clear(self) -> None:
1210
+ """Clear all entries."""
1211
+ self._entries.clear()
1212
+
1213
+ def export_to_file(self, filepath: str) -> None:
1214
+ """Export audit log to file.
1215
+
1216
+ Args:
1217
+ filepath: Output file path
1218
+ """
1219
+ import json
1220
+
1221
+ with open(filepath, "w") as f:
1222
+ for entry in self._entries:
1223
+ record = {
1224
+ "timestamp": entry.timestamp.isoformat(),
1225
+ "query_hash": entry.query_hash,
1226
+ "query_preview": entry.query_preview,
1227
+ "success": entry.success,
1228
+ "error_type": entry.error_type,
1229
+ "error_message": entry.error_message,
1230
+ "user": entry.user,
1231
+ "context": entry.context,
1232
+ }
1233
+ f.write(json.dumps(record) + "\n")
1234
+
1235
+ def get_stats(self) -> dict[str, Any]:
1236
+ """Get audit statistics."""
1237
+ total = len(self._entries)
1238
+ successes = sum(1 for e in self._entries if e.success)
1239
+ failures = total - successes
1240
+
1241
+ return {
1242
+ "total_queries": total,
1243
+ "successful": successes,
1244
+ "failed": failures,
1245
+ "success_rate": successes / total if total > 0 else 1.0,
1246
+ "unique_queries": len(set(e.query_hash for e in self._entries)),
1247
+ }