truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1041 @@
1
+ """Query Optimizer for SQL Pushdown.
2
+
3
+ This module provides query optimization capabilities including:
4
+ - Pushdown analysis: Determining which operations can be pushed to SQL
5
+ - Query optimization: Rewriting queries for better performance
6
+ - Cost estimation: Estimating the cost of different execution plans
7
+
8
+ Example:
9
+ >>> from truthound.execution.pushdown import (
10
+ ... QueryOptimizer,
11
+ ... PushdownAnalyzer,
12
+ ... QueryBuilder,
13
+ ... )
14
+ >>>
15
+ >>> query = QueryBuilder("users").select("*").where(col("age") > 18)
16
+ >>> analyzer = PushdownAnalyzer()
17
+ >>> decision = analyzer.analyze(query.build())
18
+ >>> print(decision.can_pushdown)
19
+ True
20
+ """
21
+
22
+ from __future__ import annotations
23
+
24
+ from abc import ABC, abstractmethod
25
+ from dataclasses import dataclass, field
26
+ from enum import Enum, auto
27
+ from typing import Any, Callable, Sequence, TypeVar
28
+
29
+ from truthound.execution.pushdown.ast import (
30
+ # Base
31
+ SQLNode,
32
+ SQLVisitor,
33
+ Expression,
34
+ Statement,
35
+ # Expressions
36
+ BinaryExpression,
37
+ UnaryExpression,
38
+ FunctionCall,
39
+ AggregateFunction,
40
+ WindowFunction,
41
+ CaseExpression,
42
+ InExpression,
43
+ BetweenExpression,
44
+ ExistsExpression,
45
+ SubqueryExpression,
46
+ CastExpression,
47
+ # Identifiers
48
+ Column,
49
+ Table,
50
+ Star,
51
+ Literal,
52
+ NullLiteral,
53
+ BooleanLiteral,
54
+ ArrayLiteral,
55
+ Identifier,
56
+ Alias,
57
+ # Operators
58
+ ComparisonOp,
59
+ LogicalOp,
60
+ UnaryOp,
61
+ JoinType,
62
+ # Clauses
63
+ SelectItem,
64
+ FromClause,
65
+ JoinClause,
66
+ WhereClause,
67
+ GroupByClause,
68
+ HavingClause,
69
+ OrderByClause,
70
+ OrderByItem,
71
+ LimitClause,
72
+ OffsetClause,
73
+ WindowSpec,
74
+ FrameBound,
75
+ WhenClause,
76
+ CTEClause,
77
+ # Statements
78
+ SelectStatement,
79
+ SetOperationStatement,
80
+ )
81
+ from truthound.execution.pushdown.dialects import SQLDialect, DialectConfig, DIALECT_CONFIGS
82
+
83
+
84
+ # =============================================================================
85
+ # Pushdown Capability
86
+ # =============================================================================
87
+
88
+
89
+ class PushdownCapability(Enum):
90
+ """Capabilities that affect pushdown decisions."""
91
+
92
+ # Basic capabilities
93
+ BASIC_SELECT = auto()
94
+ FILTER = auto()
95
+ AGGREGATION = auto()
96
+ GROUP_BY = auto()
97
+ ORDER_BY = auto()
98
+ LIMIT_OFFSET = auto()
99
+ DISTINCT = auto()
100
+
101
+ # Join capabilities
102
+ INNER_JOIN = auto()
103
+ LEFT_JOIN = auto()
104
+ RIGHT_JOIN = auto()
105
+ FULL_JOIN = auto()
106
+ CROSS_JOIN = auto()
107
+
108
+ # Advanced capabilities
109
+ WINDOW_FUNCTIONS = auto()
110
+ CTE = auto()
111
+ SUBQUERY = auto()
112
+ SET_OPERATIONS = auto()
113
+
114
+ # Function capabilities
115
+ STRING_FUNCTIONS = auto()
116
+ DATE_FUNCTIONS = auto()
117
+ MATH_FUNCTIONS = auto()
118
+ JSON_FUNCTIONS = auto()
119
+ ARRAY_FUNCTIONS = auto()
120
+ REGEX_FUNCTIONS = auto()
121
+
122
+ # Expression capabilities
123
+ CASE_EXPRESSION = auto()
124
+ CAST_EXPRESSION = auto()
125
+ IN_EXPRESSION = auto()
126
+ BETWEEN_EXPRESSION = auto()
127
+ EXISTS_EXPRESSION = auto()
128
+
129
+ # Grouping capabilities
130
+ GROUPING_SETS = auto()
131
+ ROLLUP = auto()
132
+ CUBE = auto()
133
+
134
+
135
+ # Standard capability sets for common database types
136
+ STANDARD_SQL_CAPABILITIES: set[PushdownCapability] = {
137
+ PushdownCapability.BASIC_SELECT,
138
+ PushdownCapability.FILTER,
139
+ PushdownCapability.AGGREGATION,
140
+ PushdownCapability.GROUP_BY,
141
+ PushdownCapability.ORDER_BY,
142
+ PushdownCapability.LIMIT_OFFSET,
143
+ PushdownCapability.DISTINCT,
144
+ PushdownCapability.INNER_JOIN,
145
+ PushdownCapability.LEFT_JOIN,
146
+ PushdownCapability.CROSS_JOIN,
147
+ PushdownCapability.STRING_FUNCTIONS,
148
+ PushdownCapability.DATE_FUNCTIONS,
149
+ PushdownCapability.MATH_FUNCTIONS,
150
+ PushdownCapability.CASE_EXPRESSION,
151
+ PushdownCapability.CAST_EXPRESSION,
152
+ PushdownCapability.IN_EXPRESSION,
153
+ PushdownCapability.BETWEEN_EXPRESSION,
154
+ }
155
+
156
+ FULL_SQL_CAPABILITIES: set[PushdownCapability] = STANDARD_SQL_CAPABILITIES | {
157
+ PushdownCapability.RIGHT_JOIN,
158
+ PushdownCapability.FULL_JOIN,
159
+ PushdownCapability.WINDOW_FUNCTIONS,
160
+ PushdownCapability.CTE,
161
+ PushdownCapability.SUBQUERY,
162
+ PushdownCapability.SET_OPERATIONS,
163
+ PushdownCapability.JSON_FUNCTIONS,
164
+ PushdownCapability.ARRAY_FUNCTIONS,
165
+ PushdownCapability.REGEX_FUNCTIONS,
166
+ PushdownCapability.EXISTS_EXPRESSION,
167
+ PushdownCapability.GROUPING_SETS,
168
+ PushdownCapability.ROLLUP,
169
+ PushdownCapability.CUBE,
170
+ }
171
+
172
+ DIALECT_CAPABILITIES: dict[SQLDialect, set[PushdownCapability]] = {
173
+ SQLDialect.POSTGRESQL: FULL_SQL_CAPABILITIES,
174
+ SQLDialect.MYSQL: STANDARD_SQL_CAPABILITIES | {
175
+ PushdownCapability.WINDOW_FUNCTIONS,
176
+ PushdownCapability.CTE,
177
+ PushdownCapability.SUBQUERY,
178
+ PushdownCapability.JSON_FUNCTIONS,
179
+ PushdownCapability.REGEX_FUNCTIONS,
180
+ PushdownCapability.ROLLUP,
181
+ },
182
+ SQLDialect.SQLITE: STANDARD_SQL_CAPABILITIES | {
183
+ PushdownCapability.WINDOW_FUNCTIONS,
184
+ PushdownCapability.CTE,
185
+ PushdownCapability.SUBQUERY,
186
+ },
187
+ SQLDialect.BIGQUERY: FULL_SQL_CAPABILITIES,
188
+ SQLDialect.SNOWFLAKE: FULL_SQL_CAPABILITIES,
189
+ SQLDialect.REDSHIFT: STANDARD_SQL_CAPABILITIES | {
190
+ PushdownCapability.WINDOW_FUNCTIONS,
191
+ PushdownCapability.CTE,
192
+ PushdownCapability.SUBQUERY,
193
+ PushdownCapability.SET_OPERATIONS,
194
+ PushdownCapability.JSON_FUNCTIONS,
195
+ },
196
+ SQLDialect.DATABRICKS: FULL_SQL_CAPABILITIES,
197
+ SQLDialect.ORACLE: FULL_SQL_CAPABILITIES,
198
+ SQLDialect.SQLSERVER: FULL_SQL_CAPABILITIES - {PushdownCapability.REGEX_FUNCTIONS},
199
+ }
200
+
201
+
202
+ # =============================================================================
203
+ # Pushdown Decision
204
+ # =============================================================================
205
+
206
+
207
+ class PushdownReason(Enum):
208
+ """Reasons for pushdown decisions."""
209
+
210
+ FULLY_SUPPORTED = "fully_supported"
211
+ PARTIALLY_SUPPORTED = "partially_supported"
212
+ UNSUPPORTED_FUNCTION = "unsupported_function"
213
+ UNSUPPORTED_EXPRESSION = "unsupported_expression"
214
+ UNSUPPORTED_JOIN = "unsupported_join"
215
+ UNSUPPORTED_WINDOW = "unsupported_window"
216
+ UNSUPPORTED_CTE = "unsupported_cte"
217
+ UNSUPPORTED_SUBQUERY = "unsupported_subquery"
218
+ MISSING_CAPABILITY = "missing_capability"
219
+ PERFORMANCE_CONCERN = "performance_concern"
220
+ DATA_SIZE_CONCERN = "data_size_concern"
221
+
222
+
223
+ @dataclass
224
+ class PushdownIssue:
225
+ """An issue preventing or affecting pushdown.
226
+
227
+ Attributes:
228
+ node: The AST node with the issue.
229
+ reason: Reason for the issue.
230
+ message: Human-readable message.
231
+ severity: Issue severity (error, warning, info).
232
+ required_capability: Required capability if applicable.
233
+ """
234
+
235
+ node: SQLNode | None
236
+ reason: PushdownReason
237
+ message: str
238
+ severity: str = "error" # error, warning, info
239
+ required_capability: PushdownCapability | None = None
240
+
241
+
242
+ @dataclass
243
+ class PushdownDecision:
244
+ """Result of pushdown analysis.
245
+
246
+ Attributes:
247
+ can_pushdown: Whether the query can be fully pushed down.
248
+ partial_pushdown: Whether partial pushdown is possible.
249
+ issues: List of issues found during analysis.
250
+ required_capabilities: Set of required capabilities.
251
+ estimated_cost: Estimated cost (0-100, lower is better).
252
+ recommendations: List of optimization recommendations.
253
+ """
254
+
255
+ can_pushdown: bool
256
+ partial_pushdown: bool = False
257
+ issues: list[PushdownIssue] = field(default_factory=list)
258
+ required_capabilities: set[PushdownCapability] = field(default_factory=set)
259
+ estimated_cost: float = 0.0
260
+ recommendations: list[str] = field(default_factory=list)
261
+
262
+ @property
263
+ def has_errors(self) -> bool:
264
+ """Check if there are any errors."""
265
+ return any(issue.severity == "error" for issue in self.issues)
266
+
267
+ @property
268
+ def has_warnings(self) -> bool:
269
+ """Check if there are any warnings."""
270
+ return any(issue.severity == "warning" for issue in self.issues)
271
+
272
+ def add_issue(
273
+ self,
274
+ node: SQLNode | None,
275
+ reason: PushdownReason,
276
+ message: str,
277
+ severity: str = "error",
278
+ required_capability: PushdownCapability | None = None,
279
+ ) -> None:
280
+ """Add an issue to the decision."""
281
+ self.issues.append(
282
+ PushdownIssue(node, reason, message, severity, required_capability)
283
+ )
284
+ if severity == "error":
285
+ self.can_pushdown = False
286
+
287
+ def add_recommendation(self, recommendation: str) -> None:
288
+ """Add a recommendation."""
289
+ self.recommendations.append(recommendation)
290
+
291
+
292
+ # =============================================================================
293
+ # Pushdown Analyzer
294
+ # =============================================================================
295
+
296
+
297
+ class PushdownAnalyzer(SQLVisitor):
298
+ """Analyzes queries to determine pushdown feasibility.
299
+
300
+ This analyzer traverses the AST to identify:
301
+ - Required capabilities for the query
302
+ - Potential issues preventing pushdown
303
+ - Optimization opportunities
304
+
305
+ Example:
306
+ >>> analyzer = PushdownAnalyzer(SQLDialect.POSTGRESQL)
307
+ >>> decision = analyzer.analyze(query)
308
+ >>> if decision.can_pushdown:
309
+ ... print("Query can be fully pushed down")
310
+ """
311
+
312
+ def __init__(
313
+ self,
314
+ dialect: SQLDialect = SQLDialect.GENERIC,
315
+ available_capabilities: set[PushdownCapability] | None = None,
316
+ ) -> None:
317
+ """Initialize analyzer.
318
+
319
+ Args:
320
+ dialect: SQL dialect to analyze for.
321
+ available_capabilities: Override available capabilities.
322
+ """
323
+ self.dialect = dialect
324
+ self.available_capabilities = (
325
+ available_capabilities
326
+ if available_capabilities is not None
327
+ else DIALECT_CAPABILITIES.get(dialect, STANDARD_SQL_CAPABILITIES)
328
+ )
329
+ self._decision: PushdownDecision | None = None
330
+ self._function_registry: dict[str, set[SQLDialect]] = self._build_function_registry()
331
+
332
+ def _build_function_registry(self) -> dict[str, set[SQLDialect]]:
333
+ """Build registry of supported functions per dialect."""
334
+ # Common functions supported by all dialects
335
+ common_functions = {
336
+ "COUNT", "SUM", "AVG", "MIN", "MAX",
337
+ "COALESCE", "NULLIF",
338
+ "UPPER", "LOWER", "LENGTH", "TRIM",
339
+ "ROUND", "ABS", "FLOOR", "CEIL",
340
+ "CAST",
341
+ }
342
+
343
+ registry: dict[str, set[SQLDialect]] = {}
344
+
345
+ # Add common functions to all dialects
346
+ for func in common_functions:
347
+ registry[func] = set(SQLDialect)
348
+
349
+ # PostgreSQL-specific
350
+ pg_functions = {
351
+ "ARRAY_AGG", "STRING_AGG", "JSONB_BUILD_OBJECT",
352
+ "REGEXP_REPLACE", "REGEXP_MATCHES",
353
+ }
354
+ for func in pg_functions:
355
+ registry[func] = {SQLDialect.POSTGRESQL}
356
+
357
+ # BigQuery-specific
358
+ bq_functions = {
359
+ "SAFE_DIVIDE", "IFNULL", "REGEXP_CONTAINS",
360
+ "STRUCT", "ARRAY", "UNNEST",
361
+ }
362
+ for func in bq_functions:
363
+ registry[func] = {SQLDialect.BIGQUERY}
364
+
365
+ return registry
366
+
367
+ def analyze(self, statement: Statement) -> PushdownDecision:
368
+ """Analyze a statement for pushdown feasibility.
369
+
370
+ Args:
371
+ statement: Statement to analyze.
372
+
373
+ Returns:
374
+ PushdownDecision with analysis results.
375
+ """
376
+ self._decision = PushdownDecision(can_pushdown=True)
377
+ statement.accept(self)
378
+
379
+ # Check if all required capabilities are available
380
+ missing = self._decision.required_capabilities - self.available_capabilities
381
+ if missing:
382
+ for cap in missing:
383
+ self._decision.add_issue(
384
+ None,
385
+ PushdownReason.MISSING_CAPABILITY,
386
+ f"Missing capability: {cap.name}",
387
+ required_capability=cap,
388
+ )
389
+
390
+ return self._decision
391
+
392
+ def _require_capability(self, capability: PushdownCapability) -> None:
393
+ """Mark a capability as required."""
394
+ if self._decision:
395
+ self._decision.required_capabilities.add(capability)
396
+
397
+ def _check_function(self, func_name: str, node: SQLNode) -> None:
398
+ """Check if a function is supported."""
399
+ func_upper = func_name.upper()
400
+ if func_upper in self._function_registry:
401
+ supported_dialects = self._function_registry[func_upper]
402
+ if self.dialect not in supported_dialects and SQLDialect not in supported_dialects:
403
+ if self._decision:
404
+ self._decision.add_issue(
405
+ node,
406
+ PushdownReason.UNSUPPORTED_FUNCTION,
407
+ f"Function {func_name} may not be supported in {self.dialect.value}",
408
+ severity="warning",
409
+ )
410
+
411
+ # -------------------------------------------------------------------------
412
+ # Visitor Methods
413
+ # -------------------------------------------------------------------------
414
+
415
+ def visit_literal(self, node: Literal) -> Any:
416
+ return None
417
+
418
+ def visit_null_literal(self, node: NullLiteral) -> Any:
419
+ return None
420
+
421
+ def visit_boolean_literal(self, node: BooleanLiteral) -> Any:
422
+ return None
423
+
424
+ def visit_array_literal(self, node: ArrayLiteral) -> Any:
425
+ self._require_capability(PushdownCapability.ARRAY_FUNCTIONS)
426
+ for elem in node.elements:
427
+ elem.accept(self)
428
+ return None
429
+
430
+ def visit_identifier(self, node: Identifier) -> Any:
431
+ return None
432
+
433
+ def visit_column(self, node: Column) -> Any:
434
+ return None
435
+
436
+ def visit_table(self, node: Table) -> Any:
437
+ return None
438
+
439
+ def visit_alias(self, node: Alias) -> Any:
440
+ node.expression.accept(self)
441
+ return None
442
+
443
+ def visit_star(self, node: Star) -> Any:
444
+ return None
445
+
446
+ def visit_binary_expression(self, node: BinaryExpression) -> Any:
447
+ node.left.accept(self)
448
+ node.right.accept(self)
449
+
450
+ # Check for regex operations
451
+ if isinstance(node.operator, ComparisonOp):
452
+ if node.operator in (ComparisonOp.REGEXP, ComparisonOp.SIMILAR_TO):
453
+ self._require_capability(PushdownCapability.REGEX_FUNCTIONS)
454
+
455
+ return None
456
+
457
+ def visit_unary_expression(self, node: UnaryExpression) -> Any:
458
+ node.operand.accept(self)
459
+ return None
460
+
461
+ def visit_in_expression(self, node: InExpression) -> Any:
462
+ self._require_capability(PushdownCapability.IN_EXPRESSION)
463
+ node.expression.accept(self)
464
+ if isinstance(node.values, SelectStatement):
465
+ self._require_capability(PushdownCapability.SUBQUERY)
466
+ node.values.accept(self)
467
+ else:
468
+ for v in node.values:
469
+ v.accept(self)
470
+ return None
471
+
472
+ def visit_between_expression(self, node: BetweenExpression) -> Any:
473
+ self._require_capability(PushdownCapability.BETWEEN_EXPRESSION)
474
+ node.expression.accept(self)
475
+ node.low.accept(self)
476
+ node.high.accept(self)
477
+ return None
478
+
479
+ def visit_exists_expression(self, node: ExistsExpression) -> Any:
480
+ self._require_capability(PushdownCapability.EXISTS_EXPRESSION)
481
+ self._require_capability(PushdownCapability.SUBQUERY)
482
+ node.subquery.accept(self)
483
+ return None
484
+
485
+ def visit_subquery_expression(self, node: SubqueryExpression) -> Any:
486
+ self._require_capability(PushdownCapability.SUBQUERY)
487
+ node.subquery.accept(self)
488
+ return None
489
+
490
+ def visit_cast_expression(self, node: CastExpression) -> Any:
491
+ self._require_capability(PushdownCapability.CAST_EXPRESSION)
492
+ node.expression.accept(self)
493
+ return None
494
+
495
+ def visit_when_clause(self, node: WhenClause) -> Any:
496
+ node.condition.accept(self)
497
+ node.result.accept(self)
498
+ return None
499
+
500
+ def visit_case_expression(self, node: CaseExpression) -> Any:
501
+ self._require_capability(PushdownCapability.CASE_EXPRESSION)
502
+ if node.operand:
503
+ node.operand.accept(self)
504
+ for when in node.when_clauses:
505
+ when.accept(self)
506
+ if node.else_result:
507
+ node.else_result.accept(self)
508
+ return None
509
+
510
+ def visit_function_call(self, node: FunctionCall) -> Any:
511
+ self._check_function(node.name, node)
512
+ for arg in node.arguments:
513
+ arg.accept(self)
514
+ if node.filter_clause:
515
+ node.filter_clause.accept(self)
516
+ return None
517
+
518
+ def visit_aggregate_function(self, node: AggregateFunction) -> Any:
519
+ self._require_capability(PushdownCapability.AGGREGATION)
520
+ self._check_function(node.name, node)
521
+ if node.argument:
522
+ node.argument.accept(self)
523
+ if node.filter_clause:
524
+ node.filter_clause.accept(self)
525
+ if node.order_by:
526
+ for item in node.order_by:
527
+ item.accept(self)
528
+ return None
529
+
530
+ def visit_frame_bound(self, node: FrameBound) -> Any:
531
+ return None
532
+
533
+ def visit_window_spec(self, node: WindowSpec) -> Any:
534
+ if node.partition_by:
535
+ for expr in node.partition_by:
536
+ expr.accept(self)
537
+ if node.order_by:
538
+ for item in node.order_by:
539
+ item.accept(self)
540
+ if node.frame_start:
541
+ node.frame_start.accept(self)
542
+ if node.frame_end:
543
+ node.frame_end.accept(self)
544
+ return None
545
+
546
+ def visit_window_function(self, node: WindowFunction) -> Any:
547
+ self._require_capability(PushdownCapability.WINDOW_FUNCTIONS)
548
+ node.function.accept(self)
549
+ if isinstance(node.window_spec, WindowSpec):
550
+ node.window_spec.accept(self)
551
+ return None
552
+
553
+ def visit_select_item(self, node: SelectItem) -> Any:
554
+ node.expression.accept(self)
555
+ return None
556
+
557
+ def visit_from_clause(self, node: FromClause) -> Any:
558
+ node.source.accept(self)
559
+ return None
560
+
561
+ def visit_join_clause(self, node: JoinClause) -> Any:
562
+ node.left.accept(self)
563
+ node.right.accept(self)
564
+
565
+ # Check join type capabilities
566
+ join_capability_map = {
567
+ JoinType.INNER: PushdownCapability.INNER_JOIN,
568
+ JoinType.LEFT: PushdownCapability.LEFT_JOIN,
569
+ JoinType.LEFT_OUTER: PushdownCapability.LEFT_JOIN,
570
+ JoinType.RIGHT: PushdownCapability.RIGHT_JOIN,
571
+ JoinType.RIGHT_OUTER: PushdownCapability.RIGHT_JOIN,
572
+ JoinType.FULL: PushdownCapability.FULL_JOIN,
573
+ JoinType.FULL_OUTER: PushdownCapability.FULL_JOIN,
574
+ JoinType.CROSS: PushdownCapability.CROSS_JOIN,
575
+ }
576
+
577
+ capability = join_capability_map.get(node.join_type)
578
+ if capability:
579
+ self._require_capability(capability)
580
+
581
+ if node.condition:
582
+ node.condition.accept(self)
583
+
584
+ return None
585
+
586
+ def visit_where_clause(self, node: WhereClause) -> Any:
587
+ self._require_capability(PushdownCapability.FILTER)
588
+ node.condition.accept(self)
589
+ return None
590
+
591
+ def visit_group_by_clause(self, node: GroupByClause) -> Any:
592
+ self._require_capability(PushdownCapability.GROUP_BY)
593
+
594
+ for expr in node.expressions:
595
+ expr.accept(self)
596
+
597
+ if node.with_rollup:
598
+ self._require_capability(PushdownCapability.ROLLUP)
599
+ if node.with_cube:
600
+ self._require_capability(PushdownCapability.CUBE)
601
+ if node.grouping_sets:
602
+ self._require_capability(PushdownCapability.GROUPING_SETS)
603
+
604
+ return None
605
+
606
+ def visit_having_clause(self, node: HavingClause) -> Any:
607
+ self._require_capability(PushdownCapability.GROUP_BY)
608
+ node.condition.accept(self)
609
+ return None
610
+
611
+ def visit_order_by_item(self, node: OrderByItem) -> Any:
612
+ node.expression.accept(self)
613
+ return None
614
+
615
+ def visit_order_by_clause(self, node: OrderByClause) -> Any:
616
+ self._require_capability(PushdownCapability.ORDER_BY)
617
+ for item in node.items:
618
+ item.accept(self)
619
+ return None
620
+
621
+ def visit_limit_clause(self, node: LimitClause) -> Any:
622
+ self._require_capability(PushdownCapability.LIMIT_OFFSET)
623
+ if isinstance(node.count, Expression):
624
+ node.count.accept(self)
625
+ return None
626
+
627
+ def visit_offset_clause(self, node: OffsetClause) -> Any:
628
+ self._require_capability(PushdownCapability.LIMIT_OFFSET)
629
+ if isinstance(node.offset, Expression):
630
+ node.offset.accept(self)
631
+ return None
632
+
633
+ def visit_cte_clause(self, node: CTEClause) -> Any:
634
+ self._require_capability(PushdownCapability.CTE)
635
+ node.query.accept(self)
636
+ return None
637
+
638
+ def visit_select_statement(self, node: SelectStatement) -> Any:
639
+ self._require_capability(PushdownCapability.BASIC_SELECT)
640
+
641
+ if node.distinct:
642
+ self._require_capability(PushdownCapability.DISTINCT)
643
+
644
+ if node.ctes:
645
+ for cte in node.ctes:
646
+ cte.accept(self)
647
+
648
+ for item in node.select_items:
649
+ if isinstance(item, SelectItem):
650
+ item.accept(self)
651
+ else:
652
+ item.accept(self)
653
+
654
+ if node.from_clause:
655
+ node.from_clause.accept(self)
656
+ if node.where_clause:
657
+ node.where_clause.accept(self)
658
+ if node.group_by_clause:
659
+ node.group_by_clause.accept(self)
660
+ if node.having_clause:
661
+ node.having_clause.accept(self)
662
+ if node.order_by_clause:
663
+ node.order_by_clause.accept(self)
664
+ if node.limit_clause:
665
+ node.limit_clause.accept(self)
666
+ if node.offset_clause:
667
+ node.offset_clause.accept(self)
668
+
669
+ return None
670
+
671
+ def visit_set_operation(self, node: SetOperationStatement) -> Any:
672
+ self._require_capability(PushdownCapability.SET_OPERATIONS)
673
+ node.left.accept(self)
674
+ node.right.accept(self)
675
+ return None
676
+
677
+
678
+ # =============================================================================
679
+ # Optimization Rules
680
+ # =============================================================================
681
+
682
+
683
+ class OptimizationRule(ABC):
684
+ """Base class for query optimization rules.
685
+
686
+ Optimization rules transform queries to improve performance.
687
+ Each rule checks if it applies and transforms the query.
688
+ """
689
+
690
+ @property
691
+ @abstractmethod
692
+ def name(self) -> str:
693
+ """Rule name."""
694
+ pass
695
+
696
+ @property
697
+ def priority(self) -> int:
698
+ """Rule priority (higher runs first)."""
699
+ return 0
700
+
701
+ @abstractmethod
702
+ def applies(self, node: SQLNode) -> bool:
703
+ """Check if this rule applies to the node.
704
+
705
+ Args:
706
+ node: Node to check.
707
+
708
+ Returns:
709
+ True if rule applies.
710
+ """
711
+ pass
712
+
713
+ @abstractmethod
714
+ def transform(self, node: SQLNode) -> SQLNode:
715
+ """Transform the node.
716
+
717
+ Args:
718
+ node: Node to transform.
719
+
720
+ Returns:
721
+ Transformed node.
722
+ """
723
+ pass
724
+
725
+
726
+ class PredicatePushdownRule(OptimizationRule):
727
+ """Push predicates closer to data sources.
728
+
729
+ This rule attempts to move WHERE conditions as early as possible
730
+ in the query execution, potentially pushing them into subqueries.
731
+ """
732
+
733
+ @property
734
+ def name(self) -> str:
735
+ return "predicate_pushdown"
736
+
737
+ @property
738
+ def priority(self) -> int:
739
+ return 100
740
+
741
+ def applies(self, node: SQLNode) -> bool:
742
+ # Check if there's a WHERE clause that can be pushed down
743
+ if isinstance(node, SelectStatement):
744
+ return (
745
+ node.where_clause is not None
746
+ and node.from_clause is not None
747
+ and isinstance(node.from_clause.source, (JoinClause, SelectStatement))
748
+ )
749
+ return False
750
+
751
+ def transform(self, node: SQLNode) -> SQLNode:
752
+ # For now, return unchanged - actual implementation would
753
+ # analyze predicates and push them into subqueries
754
+ return node
755
+
756
+
757
+ class ProjectionPushdownRule(OptimizationRule):
758
+ """Push projections closer to data sources.
759
+
760
+ This rule reduces the number of columns fetched by pushing
761
+ SELECT column lists into subqueries.
762
+ """
763
+
764
+ @property
765
+ def name(self) -> str:
766
+ return "projection_pushdown"
767
+
768
+ @property
769
+ def priority(self) -> int:
770
+ return 90
771
+
772
+ def applies(self, node: SQLNode) -> bool:
773
+ if isinstance(node, SelectStatement):
774
+ # Check if we're selecting specific columns from a subquery
775
+ if (
776
+ node.from_clause is not None
777
+ and isinstance(node.from_clause.source, SelectStatement)
778
+ ):
779
+ # Check if subquery has SELECT *
780
+ subquery = node.from_clause.source
781
+ return any(
782
+ isinstance(item, Star) or (isinstance(item, SelectItem) and isinstance(item.expression, Star))
783
+ for item in subquery.select_items
784
+ )
785
+ return False
786
+
787
+ def transform(self, node: SQLNode) -> SQLNode:
788
+ return node
789
+
790
+
791
+ class ConstantFoldingRule(OptimizationRule):
792
+ """Fold constant expressions at compile time.
793
+
794
+ This rule evaluates constant expressions like `1 + 1` to `2`.
795
+ """
796
+
797
+ @property
798
+ def name(self) -> str:
799
+ return "constant_folding"
800
+
801
+ @property
802
+ def priority(self) -> int:
803
+ return 50
804
+
805
+ def applies(self, node: SQLNode) -> bool:
806
+ if isinstance(node, BinaryExpression):
807
+ return (
808
+ isinstance(node.left, Literal)
809
+ and isinstance(node.right, Literal)
810
+ )
811
+ return False
812
+
813
+ def transform(self, node: SQLNode) -> SQLNode:
814
+ if isinstance(node, BinaryExpression):
815
+ if isinstance(node.left, Literal) and isinstance(node.right, Literal):
816
+ # Could evaluate the expression here
817
+ pass
818
+ return node
819
+
820
+
821
+ # =============================================================================
822
+ # Query Optimizer
823
+ # =============================================================================
824
+
825
+
826
+ class QueryOptimizer:
827
+ """Optimizes SQL queries for better performance.
828
+
829
+ The optimizer applies a series of transformation rules to
830
+ rewrite queries into more efficient forms.
831
+
832
+ Example:
833
+ >>> optimizer = QueryOptimizer()
834
+ >>> optimized = optimizer.optimize(query)
835
+ """
836
+
837
+ def __init__(
838
+ self,
839
+ rules: Sequence[OptimizationRule] | None = None,
840
+ dialect: SQLDialect = SQLDialect.GENERIC,
841
+ ) -> None:
842
+ """Initialize optimizer.
843
+
844
+ Args:
845
+ rules: Custom optimization rules. If None, uses default rules.
846
+ dialect: SQL dialect for dialect-specific optimizations.
847
+ """
848
+ self.dialect = dialect
849
+ self._rules = list(rules) if rules else self._default_rules()
850
+ # Sort by priority (descending)
851
+ self._rules.sort(key=lambda r: r.priority, reverse=True)
852
+
853
+ def _default_rules(self) -> list[OptimizationRule]:
854
+ """Get default optimization rules."""
855
+ return [
856
+ PredicatePushdownRule(),
857
+ ProjectionPushdownRule(),
858
+ ConstantFoldingRule(),
859
+ ]
860
+
861
+ def add_rule(self, rule: OptimizationRule) -> None:
862
+ """Add an optimization rule.
863
+
864
+ Args:
865
+ rule: Rule to add.
866
+ """
867
+ self._rules.append(rule)
868
+ self._rules.sort(key=lambda r: r.priority, reverse=True)
869
+
870
+ def optimize(
871
+ self,
872
+ statement: Statement,
873
+ max_iterations: int = 10,
874
+ ) -> Statement:
875
+ """Optimize a statement.
876
+
877
+ Args:
878
+ statement: Statement to optimize.
879
+ max_iterations: Maximum optimization iterations.
880
+
881
+ Returns:
882
+ Optimized statement.
883
+ """
884
+ current = statement
885
+ for _ in range(max_iterations):
886
+ changed = False
887
+ for rule in self._rules:
888
+ if rule.applies(current):
889
+ new_statement = rule.transform(current)
890
+ if new_statement is not current:
891
+ current = new_statement
892
+ changed = True
893
+ if not changed:
894
+ break
895
+ return current
896
+
897
+ def analyze_and_optimize(
898
+ self,
899
+ statement: Statement,
900
+ ) -> tuple[Statement, PushdownDecision]:
901
+ """Analyze and optimize a statement.
902
+
903
+ Args:
904
+ statement: Statement to process.
905
+
906
+ Returns:
907
+ Tuple of (optimized statement, pushdown decision).
908
+ """
909
+ # First analyze
910
+ analyzer = PushdownAnalyzer(self.dialect)
911
+ decision = analyzer.analyze(statement)
912
+
913
+ # Then optimize if pushdown is possible
914
+ if decision.can_pushdown:
915
+ optimized = self.optimize(statement)
916
+ return optimized, decision
917
+
918
+ return statement, decision
919
+
920
+
921
+ # =============================================================================
922
+ # Cost Estimator
923
+ # =============================================================================
924
+
925
+
926
+ @dataclass
927
+ class CostEstimate:
928
+ """Estimated cost of a query.
929
+
930
+ Attributes:
931
+ rows: Estimated number of rows.
932
+ cpu_cost: Estimated CPU cost (arbitrary units).
933
+ io_cost: Estimated I/O cost (arbitrary units).
934
+ network_cost: Estimated network cost (arbitrary units).
935
+ total_cost: Total estimated cost.
936
+ confidence: Confidence level (0-1).
937
+ """
938
+
939
+ rows: int = 0
940
+ cpu_cost: float = 0.0
941
+ io_cost: float = 0.0
942
+ network_cost: float = 0.0
943
+ total_cost: float = 0.0
944
+ confidence: float = 0.0
945
+
946
+ def __post_init__(self) -> None:
947
+ if self.total_cost == 0.0:
948
+ self.total_cost = self.cpu_cost + self.io_cost + self.network_cost
949
+
950
+
951
+ class CostEstimator:
952
+ """Estimates the cost of query execution.
953
+
954
+ This is a simple heuristic-based cost estimator that provides
955
+ rough cost estimates for query planning purposes.
956
+ """
957
+
958
+ def __init__(
959
+ self,
960
+ default_table_rows: int = 10000,
961
+ default_selectivity: float = 0.1,
962
+ ) -> None:
963
+ """Initialize cost estimator.
964
+
965
+ Args:
966
+ default_table_rows: Default estimated rows for unknown tables.
967
+ default_selectivity: Default filter selectivity.
968
+ """
969
+ self.default_table_rows = default_table_rows
970
+ self.default_selectivity = default_selectivity
971
+ self._table_stats: dict[str, int] = {}
972
+
973
+ def set_table_stats(self, table: str, rows: int) -> None:
974
+ """Set known table statistics.
975
+
976
+ Args:
977
+ table: Table name.
978
+ rows: Number of rows.
979
+ """
980
+ self._table_stats[table] = rows
981
+
982
+ def estimate(self, statement: Statement) -> CostEstimate:
983
+ """Estimate the cost of a statement.
984
+
985
+ Args:
986
+ statement: Statement to estimate.
987
+
988
+ Returns:
989
+ CostEstimate.
990
+ """
991
+ if isinstance(statement, SelectStatement):
992
+ return self._estimate_select(statement)
993
+ return CostEstimate(confidence=0.0)
994
+
995
+ def _estimate_select(self, statement: SelectStatement) -> CostEstimate:
996
+ """Estimate cost of SELECT statement."""
997
+ # Start with base row estimate
998
+ rows = self.default_table_rows
999
+
1000
+ # Try to get actual table size
1001
+ if statement.from_clause:
1002
+ source = statement.from_clause.source
1003
+ if isinstance(source, Table):
1004
+ rows = self._table_stats.get(source.name, self.default_table_rows)
1005
+
1006
+ # Apply selectivity for WHERE clause
1007
+ if statement.where_clause:
1008
+ rows = int(rows * self.default_selectivity)
1009
+
1010
+ # Estimate costs
1011
+ cpu_cost = rows * 0.01 # Simple scan cost
1012
+ io_cost = rows * 0.001 # I/O per row
1013
+
1014
+ # Aggregation increases CPU cost
1015
+ if statement.group_by_clause:
1016
+ cpu_cost *= 2
1017
+
1018
+ # Sorting increases CPU cost
1019
+ if statement.order_by_clause:
1020
+ cpu_cost += rows * 0.05 * (rows > 0 and len(bin(rows)) or 1) # O(n log n)
1021
+
1022
+ # Window functions are expensive
1023
+ has_windows = any(
1024
+ isinstance(item, SelectItem) and isinstance(item.expression, WindowFunction)
1025
+ for item in statement.select_items
1026
+ )
1027
+ if has_windows:
1028
+ cpu_cost *= 3
1029
+
1030
+ # Apply LIMIT
1031
+ if statement.limit_clause:
1032
+ if isinstance(statement.limit_clause.count, int):
1033
+ rows = min(rows, statement.limit_clause.count)
1034
+
1035
+ return CostEstimate(
1036
+ rows=rows,
1037
+ cpu_cost=cpu_cost,
1038
+ io_cost=io_cost,
1039
+ network_cost=rows * 0.0001,
1040
+ confidence=0.5, # Low confidence for heuristic estimates
1041
+ )