truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1007 @@
1
+ """Profile comparison and drift detection.
2
+
3
+ This module provides tools for comparing profiles across time
4
+ to detect data drift, schema changes, and quality degradation.
5
+
6
+ Key features:
7
+ - Column-level comparison
8
+ - Statistical drift detection
9
+ - Schema change detection
10
+ - Configurable alerting thresholds
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ from abc import ABC, abstractmethod
16
+ from dataclasses import dataclass, field
17
+ from datetime import datetime
18
+ from enum import Enum
19
+ from typing import Any, Callable, Iterator, Sequence
20
+
21
+ from truthound.profiler.base import (
22
+ ColumnProfile,
23
+ DataType,
24
+ DistributionStats,
25
+ TableProfile,
26
+ )
27
+
28
+
29
+ # =============================================================================
30
+ # Drift Types and Severity
31
+ # =============================================================================
32
+
33
+
34
+ class DriftType(str, Enum):
35
+ """Types of data drift."""
36
+
37
+ SCHEMA = "schema" # Column added/removed/type changed
38
+ DISTRIBUTION = "distribution" # Statistical distribution changed
39
+ COMPLETENESS = "completeness" # Null ratio changed significantly
40
+ UNIQUENESS = "uniqueness" # Unique ratio changed
41
+ PATTERN = "pattern" # Pattern match ratio changed
42
+ RANGE = "range" # Value range changed
43
+ CARDINALITY = "cardinality" # Distinct count changed
44
+
45
+
46
+ class DriftSeverity(str, Enum):
47
+ """Severity levels for detected drift."""
48
+
49
+ INFO = "info" # Minor change, informational
50
+ WARNING = "warning" # Notable change, may need attention
51
+ CRITICAL = "critical" # Significant change, likely requires action
52
+
53
+
54
+ class ChangeDirection(str, Enum):
55
+ """Direction of change."""
56
+
57
+ INCREASED = "increased"
58
+ DECREASED = "decreased"
59
+ UNCHANGED = "unchanged"
60
+
61
+
62
+ # =============================================================================
63
+ # Drift Detection Results
64
+ # =============================================================================
65
+
66
+
67
+ @dataclass(frozen=True)
68
+ class DriftResult:
69
+ """Result of a single drift detection.
70
+
71
+ Attributes:
72
+ drift_type: Type of drift detected
73
+ severity: Severity level
74
+ column: Column name (None for table-level)
75
+ metric: Specific metric that changed
76
+ old_value: Previous value
77
+ new_value: Current value
78
+ change_ratio: Relative change (new - old) / old
79
+ direction: Direction of change
80
+ message: Human-readable description
81
+ threshold: Threshold that was exceeded
82
+ """
83
+
84
+ drift_type: DriftType
85
+ severity: DriftSeverity
86
+ column: str | None
87
+ metric: str
88
+ old_value: Any
89
+ new_value: Any
90
+ change_ratio: float | None = None
91
+ direction: ChangeDirection = ChangeDirection.UNCHANGED
92
+ message: str = ""
93
+ threshold: float | None = None
94
+
95
+ def to_dict(self) -> dict[str, Any]:
96
+ """Convert to dictionary for serialization."""
97
+ return {
98
+ "drift_type": self.drift_type.value,
99
+ "severity": self.severity.value,
100
+ "column": self.column,
101
+ "metric": self.metric,
102
+ "old_value": self._serialize_value(self.old_value),
103
+ "new_value": self._serialize_value(self.new_value),
104
+ "change_ratio": self.change_ratio,
105
+ "direction": self.direction.value,
106
+ "message": self.message,
107
+ "threshold": self.threshold,
108
+ }
109
+
110
+ def _serialize_value(self, value: Any) -> Any:
111
+ """Serialize value for JSON."""
112
+ if isinstance(value, datetime):
113
+ return value.isoformat()
114
+ if isinstance(value, Enum):
115
+ return value.value
116
+ return value
117
+
118
+
119
+ @dataclass(frozen=True)
120
+ class ColumnComparison:
121
+ """Comparison result for a single column."""
122
+
123
+ column_name: str
124
+ exists_in_old: bool
125
+ exists_in_new: bool
126
+ drifts: tuple[DriftResult, ...] = field(default_factory=tuple)
127
+
128
+ @property
129
+ def has_drift(self) -> bool:
130
+ """Check if any drift was detected."""
131
+ return len(self.drifts) > 0
132
+
133
+ @property
134
+ def max_severity(self) -> DriftSeverity | None:
135
+ """Get maximum severity among drifts."""
136
+ if not self.drifts:
137
+ return None
138
+ severity_order = {
139
+ DriftSeverity.INFO: 0,
140
+ DriftSeverity.WARNING: 1,
141
+ DriftSeverity.CRITICAL: 2,
142
+ }
143
+ return max(self.drifts, key=lambda d: severity_order[d.severity]).severity
144
+
145
+ @property
146
+ def is_new(self) -> bool:
147
+ """Check if column is newly added."""
148
+ return not self.exists_in_old and self.exists_in_new
149
+
150
+ @property
151
+ def is_removed(self) -> bool:
152
+ """Check if column was removed."""
153
+ return self.exists_in_old and not self.exists_in_new
154
+
155
+ def to_dict(self) -> dict[str, Any]:
156
+ """Convert to dictionary."""
157
+ return {
158
+ "column_name": self.column_name,
159
+ "exists_in_old": self.exists_in_old,
160
+ "exists_in_new": self.exists_in_new,
161
+ "drifts": [d.to_dict() for d in self.drifts],
162
+ "has_drift": self.has_drift,
163
+ "max_severity": self.max_severity.value if self.max_severity else None,
164
+ }
165
+
166
+
167
+ @dataclass(frozen=True)
168
+ class ProfileComparison:
169
+ """Complete comparison between two profiles.
170
+
171
+ This is the main result type for profile comparison, containing
172
+ all detected drifts organized by column and type.
173
+ """
174
+
175
+ old_profile_name: str
176
+ new_profile_name: str
177
+ old_profiled_at: datetime
178
+ new_profiled_at: datetime
179
+ columns: tuple[ColumnComparison, ...] = field(default_factory=tuple)
180
+ table_drifts: tuple[DriftResult, ...] = field(default_factory=tuple)
181
+ comparison_timestamp: datetime = field(default_factory=datetime.now)
182
+
183
+ def __iter__(self) -> Iterator[ColumnComparison]:
184
+ """Iterate over column comparisons."""
185
+ return iter(self.columns)
186
+
187
+ def __len__(self) -> int:
188
+ """Get number of columns compared."""
189
+ return len(self.columns)
190
+
191
+ @property
192
+ def has_drift(self) -> bool:
193
+ """Check if any drift was detected."""
194
+ if self.table_drifts:
195
+ return True
196
+ return any(c.has_drift for c in self.columns)
197
+
198
+ @property
199
+ def has_schema_changes(self) -> bool:
200
+ """Check if schema changed (columns added/removed)."""
201
+ return any(c.is_new or c.is_removed for c in self.columns)
202
+
203
+ @property
204
+ def drift_count(self) -> int:
205
+ """Get total number of drifts detected."""
206
+ return len(self.table_drifts) + sum(len(c.drifts) for c in self.columns)
207
+
208
+ @property
209
+ def all_drifts(self) -> list[DriftResult]:
210
+ """Get all drifts as flat list."""
211
+ result = list(self.table_drifts)
212
+ for col in self.columns:
213
+ result.extend(col.drifts)
214
+ return result
215
+
216
+ def get_by_severity(self, severity: DriftSeverity) -> list[DriftResult]:
217
+ """Get all drifts of a specific severity."""
218
+ return [d for d in self.all_drifts if d.severity == severity]
219
+
220
+ def get_by_type(self, drift_type: DriftType) -> list[DriftResult]:
221
+ """Get all drifts of a specific type."""
222
+ return [d for d in self.all_drifts if d.drift_type == drift_type]
223
+
224
+ def get_column(self, name: str) -> ColumnComparison | None:
225
+ """Get comparison for a specific column."""
226
+ for col in self.columns:
227
+ if col.column_name == name:
228
+ return col
229
+ return None
230
+
231
+ def to_dict(self) -> dict[str, Any]:
232
+ """Convert to dictionary for serialization."""
233
+ return {
234
+ "old_profile_name": self.old_profile_name,
235
+ "new_profile_name": self.new_profile_name,
236
+ "old_profiled_at": self.old_profiled_at.isoformat(),
237
+ "new_profiled_at": self.new_profiled_at.isoformat(),
238
+ "comparison_timestamp": self.comparison_timestamp.isoformat(),
239
+ "has_drift": self.has_drift,
240
+ "has_schema_changes": self.has_schema_changes,
241
+ "drift_count": self.drift_count,
242
+ "summary": self._build_summary(),
243
+ "columns": [c.to_dict() for c in self.columns],
244
+ "table_drifts": [d.to_dict() for d in self.table_drifts],
245
+ }
246
+
247
+ def _build_summary(self) -> dict[str, Any]:
248
+ """Build summary statistics."""
249
+ by_severity = {
250
+ s.value: len(self.get_by_severity(s))
251
+ for s in DriftSeverity
252
+ }
253
+ by_type = {
254
+ t.value: len(self.get_by_type(t))
255
+ for t in DriftType
256
+ }
257
+ return {
258
+ "by_severity": by_severity,
259
+ "by_type": by_type,
260
+ "columns_added": sum(1 for c in self.columns if c.is_new),
261
+ "columns_removed": sum(1 for c in self.columns if c.is_removed),
262
+ "columns_with_drift": sum(1 for c in self.columns if c.has_drift),
263
+ }
264
+
265
+ def to_report(self) -> str:
266
+ """Generate human-readable report."""
267
+ lines = [
268
+ "=" * 60,
269
+ "PROFILE COMPARISON REPORT",
270
+ "=" * 60,
271
+ f"Old Profile: {self.old_profile_name} ({self.old_profiled_at})",
272
+ f"New Profile: {self.new_profile_name} ({self.new_profiled_at})",
273
+ f"Compared At: {self.comparison_timestamp}",
274
+ "",
275
+ "SUMMARY",
276
+ "-" * 40,
277
+ f"Total Drifts: {self.drift_count}",
278
+ f" Critical: {len(self.get_by_severity(DriftSeverity.CRITICAL))}",
279
+ f" Warning: {len(self.get_by_severity(DriftSeverity.WARNING))}",
280
+ f" Info: {len(self.get_by_severity(DriftSeverity.INFO))}",
281
+ "",
282
+ ]
283
+
284
+ if self.has_schema_changes:
285
+ lines.extend([
286
+ "SCHEMA CHANGES",
287
+ "-" * 40,
288
+ ])
289
+ for col in self.columns:
290
+ if col.is_new:
291
+ lines.append(f" + Added: {col.column_name}")
292
+ elif col.is_removed:
293
+ lines.append(f" - Removed: {col.column_name}")
294
+ lines.append("")
295
+
296
+ # Group drifts by severity
297
+ for severity in [DriftSeverity.CRITICAL, DriftSeverity.WARNING, DriftSeverity.INFO]:
298
+ drifts = self.get_by_severity(severity)
299
+ if drifts:
300
+ lines.extend([
301
+ f"{severity.value.upper()} DRIFTS",
302
+ "-" * 40,
303
+ ])
304
+ for drift in drifts:
305
+ col_str = f"[{drift.column}] " if drift.column else ""
306
+ lines.append(f" {col_str}{drift.message}")
307
+ lines.append("")
308
+
309
+ lines.append("=" * 60)
310
+ return "\n".join(lines)
311
+
312
+
313
+ # =============================================================================
314
+ # Drift Thresholds Configuration
315
+ # =============================================================================
316
+
317
+
318
+ @dataclass
319
+ class DriftThresholds:
320
+ """Configuration for drift detection thresholds.
321
+
322
+ All thresholds are relative changes (0.0 to 1.0) unless noted.
323
+
324
+ Attributes:
325
+ null_ratio_warning: Null ratio change for warning
326
+ null_ratio_critical: Null ratio change for critical
327
+ unique_ratio_warning: Unique ratio change for warning
328
+ unique_ratio_critical: Unique ratio change for critical
329
+ mean_warning: Mean change (relative) for warning
330
+ mean_critical: Mean change (relative) for critical
331
+ std_warning: Standard deviation change for warning
332
+ std_critical: Standard deviation change for critical
333
+ min_warning: Min value change for warning
334
+ max_warning: Max value change for warning
335
+ cardinality_warning: Distinct count change for warning
336
+ cardinality_critical: Distinct count change for critical
337
+ pattern_match_warning: Pattern match ratio change for warning
338
+ """
339
+
340
+ # Completeness
341
+ null_ratio_warning: float = 0.05 # 5% change
342
+ null_ratio_critical: float = 0.20 # 20% change
343
+
344
+ # Uniqueness
345
+ unique_ratio_warning: float = 0.10 # 10% change
346
+ unique_ratio_critical: float = 0.30 # 30% change
347
+
348
+ # Distribution (numeric)
349
+ mean_warning: float = 0.10 # 10% relative change
350
+ mean_critical: float = 0.30 # 30% relative change
351
+ std_warning: float = 0.20 # 20% change
352
+ std_critical: float = 0.50 # 50% change
353
+
354
+ # Range
355
+ min_warning: float = 0.10 # 10% change
356
+ max_warning: float = 0.10 # 10% change
357
+
358
+ # Cardinality
359
+ cardinality_warning: float = 0.20 # 20% change
360
+ cardinality_critical: float = 0.50 # 50% change
361
+
362
+ # Patterns
363
+ pattern_match_warning: float = 0.10 # 10% change
364
+
365
+ @classmethod
366
+ def strict(cls) -> "DriftThresholds":
367
+ """Create strict thresholds (lower tolerance)."""
368
+ return cls(
369
+ null_ratio_warning=0.02,
370
+ null_ratio_critical=0.10,
371
+ unique_ratio_warning=0.05,
372
+ unique_ratio_critical=0.15,
373
+ mean_warning=0.05,
374
+ mean_critical=0.15,
375
+ std_warning=0.10,
376
+ std_critical=0.30,
377
+ )
378
+
379
+ @classmethod
380
+ def loose(cls) -> "DriftThresholds":
381
+ """Create loose thresholds (higher tolerance)."""
382
+ return cls(
383
+ null_ratio_warning=0.10,
384
+ null_ratio_critical=0.40,
385
+ unique_ratio_warning=0.20,
386
+ unique_ratio_critical=0.50,
387
+ mean_warning=0.20,
388
+ mean_critical=0.50,
389
+ std_warning=0.30,
390
+ std_critical=0.70,
391
+ )
392
+
393
+
394
+ # =============================================================================
395
+ # Drift Detectors
396
+ # =============================================================================
397
+
398
+
399
+ class DriftDetector(ABC):
400
+ """Abstract base for drift detection strategies."""
401
+
402
+ name: str = "base"
403
+ drift_type: DriftType = DriftType.DISTRIBUTION
404
+
405
+ @abstractmethod
406
+ def detect(
407
+ self,
408
+ old: ColumnProfile,
409
+ new: ColumnProfile,
410
+ thresholds: DriftThresholds,
411
+ ) -> list[DriftResult]:
412
+ """Detect drift between two column profiles.
413
+
414
+ Args:
415
+ old: Previous column profile
416
+ new: Current column profile
417
+ thresholds: Detection thresholds
418
+
419
+ Returns:
420
+ List of detected drifts
421
+ """
422
+ pass
423
+
424
+
425
+ class CompletenessDriftDetector(DriftDetector):
426
+ """Detects changes in null ratio (completeness)."""
427
+
428
+ name = "completeness"
429
+ drift_type = DriftType.COMPLETENESS
430
+
431
+ def detect(
432
+ self,
433
+ old: ColumnProfile,
434
+ new: ColumnProfile,
435
+ thresholds: DriftThresholds,
436
+ ) -> list[DriftResult]:
437
+ results = []
438
+
439
+ old_null = old.null_ratio
440
+ new_null = new.null_ratio
441
+ change = abs(new_null - old_null)
442
+
443
+ if change >= thresholds.null_ratio_critical:
444
+ severity = DriftSeverity.CRITICAL
445
+ elif change >= thresholds.null_ratio_warning:
446
+ severity = DriftSeverity.WARNING
447
+ else:
448
+ return results
449
+
450
+ direction = (
451
+ ChangeDirection.INCREASED if new_null > old_null
452
+ else ChangeDirection.DECREASED
453
+ )
454
+
455
+ results.append(DriftResult(
456
+ drift_type=DriftType.COMPLETENESS,
457
+ severity=severity,
458
+ column=new.name,
459
+ metric="null_ratio",
460
+ old_value=old_null,
461
+ new_value=new_null,
462
+ change_ratio=change,
463
+ direction=direction,
464
+ message=f"Null ratio {direction.value} from {old_null:.2%} to {new_null:.2%}",
465
+ threshold=thresholds.null_ratio_warning if severity == DriftSeverity.WARNING else thresholds.null_ratio_critical,
466
+ ))
467
+
468
+ return results
469
+
470
+
471
+ class UniquenessDriftDetector(DriftDetector):
472
+ """Detects changes in unique ratio."""
473
+
474
+ name = "uniqueness"
475
+ drift_type = DriftType.UNIQUENESS
476
+
477
+ def detect(
478
+ self,
479
+ old: ColumnProfile,
480
+ new: ColumnProfile,
481
+ thresholds: DriftThresholds,
482
+ ) -> list[DriftResult]:
483
+ results = []
484
+
485
+ old_ratio = old.unique_ratio
486
+ new_ratio = new.unique_ratio
487
+ change = abs(new_ratio - old_ratio)
488
+
489
+ if change >= thresholds.unique_ratio_critical:
490
+ severity = DriftSeverity.CRITICAL
491
+ elif change >= thresholds.unique_ratio_warning:
492
+ severity = DriftSeverity.WARNING
493
+ else:
494
+ return results
495
+
496
+ direction = (
497
+ ChangeDirection.INCREASED if new_ratio > old_ratio
498
+ else ChangeDirection.DECREASED
499
+ )
500
+
501
+ results.append(DriftResult(
502
+ drift_type=DriftType.UNIQUENESS,
503
+ severity=severity,
504
+ column=new.name,
505
+ metric="unique_ratio",
506
+ old_value=old_ratio,
507
+ new_value=new_ratio,
508
+ change_ratio=change,
509
+ direction=direction,
510
+ message=f"Unique ratio {direction.value} from {old_ratio:.2%} to {new_ratio:.2%}",
511
+ threshold=thresholds.unique_ratio_warning if severity == DriftSeverity.WARNING else thresholds.unique_ratio_critical,
512
+ ))
513
+
514
+ return results
515
+
516
+
517
+ class DistributionDriftDetector(DriftDetector):
518
+ """Detects changes in statistical distribution."""
519
+
520
+ name = "distribution"
521
+ drift_type = DriftType.DISTRIBUTION
522
+
523
+ def detect(
524
+ self,
525
+ old: ColumnProfile,
526
+ new: ColumnProfile,
527
+ thresholds: DriftThresholds,
528
+ ) -> list[DriftResult]:
529
+ results = []
530
+
531
+ old_dist = old.distribution
532
+ new_dist = new.distribution
533
+
534
+ if old_dist is None or new_dist is None:
535
+ return results
536
+
537
+ # Check mean
538
+ if old_dist.mean is not None and new_dist.mean is not None:
539
+ mean_drift = self._check_relative_change(
540
+ old_dist.mean,
541
+ new_dist.mean,
542
+ "mean",
543
+ new.name,
544
+ thresholds.mean_warning,
545
+ thresholds.mean_critical,
546
+ )
547
+ if mean_drift:
548
+ results.append(mean_drift)
549
+
550
+ # Check standard deviation
551
+ if old_dist.std is not None and new_dist.std is not None:
552
+ std_drift = self._check_relative_change(
553
+ old_dist.std,
554
+ new_dist.std,
555
+ "std",
556
+ new.name,
557
+ thresholds.std_warning,
558
+ thresholds.std_critical,
559
+ )
560
+ if std_drift:
561
+ results.append(std_drift)
562
+
563
+ return results
564
+
565
+ def _check_relative_change(
566
+ self,
567
+ old_val: float,
568
+ new_val: float,
569
+ metric: str,
570
+ column: str,
571
+ warning_threshold: float,
572
+ critical_threshold: float,
573
+ ) -> DriftResult | None:
574
+ """Check for relative change in a metric."""
575
+ if old_val == 0:
576
+ if new_val != 0:
577
+ return DriftResult(
578
+ drift_type=DriftType.DISTRIBUTION,
579
+ severity=DriftSeverity.WARNING,
580
+ column=column,
581
+ metric=metric,
582
+ old_value=old_val,
583
+ new_value=new_val,
584
+ change_ratio=None,
585
+ direction=ChangeDirection.INCREASED,
586
+ message=f"{metric} changed from 0 to {new_val:.4f}",
587
+ )
588
+ return None
589
+
590
+ change_ratio = abs(new_val - old_val) / abs(old_val)
591
+
592
+ if change_ratio >= critical_threshold:
593
+ severity = DriftSeverity.CRITICAL
594
+ elif change_ratio >= warning_threshold:
595
+ severity = DriftSeverity.WARNING
596
+ else:
597
+ return None
598
+
599
+ direction = (
600
+ ChangeDirection.INCREASED if new_val > old_val
601
+ else ChangeDirection.DECREASED
602
+ )
603
+
604
+ return DriftResult(
605
+ drift_type=DriftType.DISTRIBUTION,
606
+ severity=severity,
607
+ column=column,
608
+ metric=metric,
609
+ old_value=old_val,
610
+ new_value=new_val,
611
+ change_ratio=change_ratio,
612
+ direction=direction,
613
+ message=f"{metric} {direction.value} by {change_ratio:.1%} ({old_val:.4f} -> {new_val:.4f})",
614
+ threshold=warning_threshold if severity == DriftSeverity.WARNING else critical_threshold,
615
+ )
616
+
617
+
618
+ class RangeDriftDetector(DriftDetector):
619
+ """Detects changes in value range (min/max)."""
620
+
621
+ name = "range"
622
+ drift_type = DriftType.RANGE
623
+
624
+ def detect(
625
+ self,
626
+ old: ColumnProfile,
627
+ new: ColumnProfile,
628
+ thresholds: DriftThresholds,
629
+ ) -> list[DriftResult]:
630
+ results = []
631
+
632
+ old_dist = old.distribution
633
+ new_dist = new.distribution
634
+
635
+ if old_dist is None or new_dist is None:
636
+ return results
637
+
638
+ # Check min
639
+ if old_dist.min is not None and new_dist.min is not None:
640
+ min_drift = self._check_range_change(
641
+ old_dist.min, new_dist.min,
642
+ "min", new.name, thresholds.min_warning,
643
+ )
644
+ if min_drift:
645
+ results.append(min_drift)
646
+
647
+ # Check max
648
+ if old_dist.max is not None and new_dist.max is not None:
649
+ max_drift = self._check_range_change(
650
+ old_dist.max, new_dist.max,
651
+ "max", new.name, thresholds.max_warning,
652
+ )
653
+ if max_drift:
654
+ results.append(max_drift)
655
+
656
+ return results
657
+
658
+ def _check_range_change(
659
+ self,
660
+ old_val: float,
661
+ new_val: float,
662
+ metric: str,
663
+ column: str,
664
+ threshold: float,
665
+ ) -> DriftResult | None:
666
+ """Check for range boundary changes."""
667
+ if old_val == 0:
668
+ return None
669
+
670
+ change_ratio = abs(new_val - old_val) / abs(old_val)
671
+ if change_ratio < threshold:
672
+ return None
673
+
674
+ direction = (
675
+ ChangeDirection.INCREASED if new_val > old_val
676
+ else ChangeDirection.DECREASED
677
+ )
678
+
679
+ return DriftResult(
680
+ drift_type=DriftType.RANGE,
681
+ severity=DriftSeverity.WARNING,
682
+ column=column,
683
+ metric=metric,
684
+ old_value=old_val,
685
+ new_value=new_val,
686
+ change_ratio=change_ratio,
687
+ direction=direction,
688
+ message=f"{metric} {direction.value} from {old_val} to {new_val}",
689
+ threshold=threshold,
690
+ )
691
+
692
+
693
+ class CardinalityDriftDetector(DriftDetector):
694
+ """Detects changes in cardinality (distinct count)."""
695
+
696
+ name = "cardinality"
697
+ drift_type = DriftType.CARDINALITY
698
+
699
+ def detect(
700
+ self,
701
+ old: ColumnProfile,
702
+ new: ColumnProfile,
703
+ thresholds: DriftThresholds,
704
+ ) -> list[DriftResult]:
705
+ results = []
706
+
707
+ old_count = old.distinct_count
708
+ new_count = new.distinct_count
709
+
710
+ if old_count == 0:
711
+ return results
712
+
713
+ change_ratio = abs(new_count - old_count) / old_count
714
+
715
+ if change_ratio >= thresholds.cardinality_critical:
716
+ severity = DriftSeverity.CRITICAL
717
+ elif change_ratio >= thresholds.cardinality_warning:
718
+ severity = DriftSeverity.WARNING
719
+ else:
720
+ return results
721
+
722
+ direction = (
723
+ ChangeDirection.INCREASED if new_count > old_count
724
+ else ChangeDirection.DECREASED
725
+ )
726
+
727
+ results.append(DriftResult(
728
+ drift_type=DriftType.CARDINALITY,
729
+ severity=severity,
730
+ column=new.name,
731
+ metric="distinct_count",
732
+ old_value=old_count,
733
+ new_value=new_count,
734
+ change_ratio=change_ratio,
735
+ direction=direction,
736
+ message=f"Distinct count {direction.value} by {change_ratio:.1%} ({old_count} -> {new_count})",
737
+ threshold=thresholds.cardinality_warning if severity == DriftSeverity.WARNING else thresholds.cardinality_critical,
738
+ ))
739
+
740
+ return results
741
+
742
+
743
+ # Default detectors
744
+ DEFAULT_DETECTORS: tuple[DriftDetector, ...] = (
745
+ CompletenessDriftDetector(),
746
+ UniquenessDriftDetector(),
747
+ DistributionDriftDetector(),
748
+ RangeDriftDetector(),
749
+ CardinalityDriftDetector(),
750
+ )
751
+
752
+
753
+ # =============================================================================
754
+ # Profile Comparator
755
+ # =============================================================================
756
+
757
+
758
+ class ProfileComparator:
759
+ """Compares two profiles to detect drift.
760
+
761
+ This is the main entry point for profile comparison. It orchestrates
762
+ multiple drift detectors and builds a comprehensive comparison result.
763
+
764
+ Example:
765
+ comparator = ProfileComparator()
766
+ comparison = comparator.compare(old_profile, new_profile)
767
+
768
+ if comparison.has_drift:
769
+ print(comparison.to_report())
770
+
771
+ # With custom thresholds
772
+ comparator = ProfileComparator(thresholds=DriftThresholds.strict())
773
+ """
774
+
775
+ def __init__(
776
+ self,
777
+ detectors: Sequence[DriftDetector] | None = None,
778
+ thresholds: DriftThresholds | None = None,
779
+ ):
780
+ """Initialize comparator.
781
+
782
+ Args:
783
+ detectors: Custom drift detectors (uses defaults if None)
784
+ thresholds: Detection thresholds
785
+ """
786
+ self.detectors = list(detectors) if detectors else list(DEFAULT_DETECTORS)
787
+ self.thresholds = thresholds or DriftThresholds()
788
+
789
+ def add_detector(self, detector: DriftDetector) -> None:
790
+ """Add a custom drift detector."""
791
+ self.detectors.append(detector)
792
+
793
+ def compare(
794
+ self,
795
+ old_profile: TableProfile,
796
+ new_profile: TableProfile,
797
+ ) -> ProfileComparison:
798
+ """Compare two profiles.
799
+
800
+ Args:
801
+ old_profile: Previous/baseline profile
802
+ new_profile: Current profile to compare
803
+
804
+ Returns:
805
+ Complete comparison result
806
+ """
807
+ column_comparisons = []
808
+ table_drifts = []
809
+
810
+ # Build column name sets
811
+ old_columns = {col.name: col for col in old_profile.columns}
812
+ new_columns = {col.name: col for col in new_profile.columns}
813
+ all_column_names = set(old_columns.keys()) | set(new_columns.keys())
814
+
815
+ # Compare each column
816
+ for col_name in sorted(all_column_names):
817
+ old_col = old_columns.get(col_name)
818
+ new_col = new_columns.get(col_name)
819
+
820
+ comparison = self._compare_column(col_name, old_col, new_col)
821
+ column_comparisons.append(comparison)
822
+
823
+ # Check table-level changes
824
+ table_drifts.extend(self._check_table_drift(old_profile, new_profile))
825
+
826
+ return ProfileComparison(
827
+ old_profile_name=old_profile.name,
828
+ new_profile_name=new_profile.name,
829
+ old_profiled_at=old_profile.profiled_at,
830
+ new_profiled_at=new_profile.profiled_at,
831
+ columns=tuple(column_comparisons),
832
+ table_drifts=tuple(table_drifts),
833
+ )
834
+
835
+ def _compare_column(
836
+ self,
837
+ name: str,
838
+ old_col: ColumnProfile | None,
839
+ new_col: ColumnProfile | None,
840
+ ) -> ColumnComparison:
841
+ """Compare a single column."""
842
+ drifts: list[DriftResult] = []
843
+
844
+ # Check for schema changes
845
+ if old_col is None:
846
+ # New column added
847
+ drifts.append(DriftResult(
848
+ drift_type=DriftType.SCHEMA,
849
+ severity=DriftSeverity.WARNING,
850
+ column=name,
851
+ metric="column",
852
+ old_value=None,
853
+ new_value=new_col.physical_type if new_col else None,
854
+ message=f"New column added: {name}",
855
+ ))
856
+ elif new_col is None:
857
+ # Column removed
858
+ drifts.append(DriftResult(
859
+ drift_type=DriftType.SCHEMA,
860
+ severity=DriftSeverity.CRITICAL,
861
+ column=name,
862
+ metric="column",
863
+ old_value=old_col.physical_type,
864
+ new_value=None,
865
+ message=f"Column removed: {name}",
866
+ ))
867
+ else:
868
+ # Both exist - check for type change
869
+ if old_col.physical_type != new_col.physical_type:
870
+ drifts.append(DriftResult(
871
+ drift_type=DriftType.SCHEMA,
872
+ severity=DriftSeverity.CRITICAL,
873
+ column=name,
874
+ metric="physical_type",
875
+ old_value=old_col.physical_type,
876
+ new_value=new_col.physical_type,
877
+ message=f"Type changed from {old_col.physical_type} to {new_col.physical_type}",
878
+ ))
879
+
880
+ # Run all detectors
881
+ for detector in self.detectors:
882
+ try:
883
+ detector_drifts = detector.detect(old_col, new_col, self.thresholds)
884
+ drifts.extend(detector_drifts)
885
+ except Exception:
886
+ pass # Skip failed detectors
887
+
888
+ return ColumnComparison(
889
+ column_name=name,
890
+ exists_in_old=old_col is not None,
891
+ exists_in_new=new_col is not None,
892
+ drifts=tuple(drifts),
893
+ )
894
+
895
+ def _check_table_drift(
896
+ self,
897
+ old_profile: TableProfile,
898
+ new_profile: TableProfile,
899
+ ) -> list[DriftResult]:
900
+ """Check for table-level drift."""
901
+ drifts = []
902
+
903
+ # Row count change
904
+ if old_profile.row_count > 0:
905
+ row_change = abs(new_profile.row_count - old_profile.row_count) / old_profile.row_count
906
+ if row_change >= 0.5: # 50% change
907
+ direction = (
908
+ ChangeDirection.INCREASED if new_profile.row_count > old_profile.row_count
909
+ else ChangeDirection.DECREASED
910
+ )
911
+ drifts.append(DriftResult(
912
+ drift_type=DriftType.CARDINALITY,
913
+ severity=DriftSeverity.WARNING,
914
+ column=None,
915
+ metric="row_count",
916
+ old_value=old_profile.row_count,
917
+ new_value=new_profile.row_count,
918
+ change_ratio=row_change,
919
+ direction=direction,
920
+ message=f"Row count {direction.value} by {row_change:.1%}",
921
+ ))
922
+
923
+ # Column count change
924
+ if old_profile.column_count != new_profile.column_count:
925
+ direction = (
926
+ ChangeDirection.INCREASED if new_profile.column_count > old_profile.column_count
927
+ else ChangeDirection.DECREASED
928
+ )
929
+ drifts.append(DriftResult(
930
+ drift_type=DriftType.SCHEMA,
931
+ severity=DriftSeverity.INFO,
932
+ column=None,
933
+ metric="column_count",
934
+ old_value=old_profile.column_count,
935
+ new_value=new_profile.column_count,
936
+ direction=direction,
937
+ message=f"Column count {direction.value} from {old_profile.column_count} to {new_profile.column_count}",
938
+ ))
939
+
940
+ return drifts
941
+
942
+
943
+ # =============================================================================
944
+ # Convenience Functions
945
+ # =============================================================================
946
+
947
+
948
+ def compare_profiles(
949
+ old: TableProfile,
950
+ new: TableProfile,
951
+ *,
952
+ thresholds: DriftThresholds | None = None,
953
+ ) -> ProfileComparison:
954
+ """Compare two profiles for drift detection.
955
+
956
+ Args:
957
+ old: Previous/baseline profile
958
+ new: Current profile
959
+ thresholds: Detection thresholds (uses defaults if None)
960
+
961
+ Returns:
962
+ Comparison result
963
+
964
+ Example:
965
+ from truthound.profiler import profile_file, compare_profiles
966
+
967
+ old_profile = profile_file("data_v1.parquet")
968
+ new_profile = profile_file("data_v2.parquet")
969
+
970
+ comparison = compare_profiles(old_profile, new_profile)
971
+
972
+ if comparison.has_drift:
973
+ print(comparison.to_report())
974
+ """
975
+ comparator = ProfileComparator(thresholds=thresholds)
976
+ return comparator.compare(old, new)
977
+
978
+
979
+ def detect_drift(
980
+ old: TableProfile,
981
+ new: TableProfile,
982
+ *,
983
+ min_severity: DriftSeverity = DriftSeverity.WARNING,
984
+ ) -> list[DriftResult]:
985
+ """Detect drifts above a minimum severity.
986
+
987
+ Args:
988
+ old: Previous profile
989
+ new: Current profile
990
+ min_severity: Minimum severity to return
991
+
992
+ Returns:
993
+ List of detected drifts
994
+ """
995
+ comparison = compare_profiles(old, new)
996
+
997
+ severity_order = {
998
+ DriftSeverity.INFO: 0,
999
+ DriftSeverity.WARNING: 1,
1000
+ DriftSeverity.CRITICAL: 2,
1001
+ }
1002
+ min_level = severity_order[min_severity]
1003
+
1004
+ return [
1005
+ d for d in comparison.all_drifts
1006
+ if severity_order[d.severity] >= min_level
1007
+ ]