truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
truthound/cli.py ADDED
@@ -0,0 +1,2358 @@
1
+ """Command-line interface for Truthound."""
2
+
3
+ import json
4
+ from pathlib import Path
5
+ from typing import Annotated, Optional
6
+
7
+ import typer
8
+
9
+ from truthound.api import check, mask, profile, scan
10
+ from truthound.drift import compare
11
+ from truthound.schema import learn
12
+
13
+ # Phase 7: Auto-profiling imports (lazy loaded to avoid startup overhead)
14
+
15
+ def _version_callback(value: bool) -> None:
16
+ """Print version and exit."""
17
+ if value:
18
+ from truthound import __version__
19
+ typer.echo(f"truthound {__version__}")
20
+ raise typer.Exit()
21
+
22
+
23
+ app = typer.Typer(
24
+ name="truthound",
25
+ help="Zero-Configuration Data Quality Framework Powered by Polars",
26
+ add_completion=False,
27
+ )
28
+
29
+
30
+ @app.callback()
31
+ def main(
32
+ version: Annotated[
33
+ bool,
34
+ typer.Option(
35
+ "--version",
36
+ "-V",
37
+ callback=_version_callback,
38
+ is_eager=True,
39
+ help="Show version and exit.",
40
+ ),
41
+ ] = False,
42
+ ) -> None:
43
+ """Truthound - Zero-Configuration Data Quality Framework."""
44
+ pass
45
+
46
+ # Create checkpoint subcommand group
47
+ checkpoint_app = typer.Typer(
48
+ name="checkpoint",
49
+ help="Checkpoint and CI/CD integration commands",
50
+ )
51
+ app.add_typer(checkpoint_app, name="checkpoint")
52
+
53
+ # Phase 9: Plugin management commands
54
+ from truthound.plugins.cli import app as plugin_app
55
+ app.add_typer(plugin_app, name="plugin")
56
+
57
+ # Scaffolding commands (th new validator, th new reporter, th new plugin)
58
+ from truthound.cli_modules.scaffolding.commands import app as new_app
59
+ app.add_typer(new_app, name="new")
60
+
61
+
62
+ @app.command(name="learn")
63
+ def learn_cmd(
64
+ file: Annotated[Path, typer.Argument(help="Path to the data file to learn from")],
65
+ output: Annotated[
66
+ Path,
67
+ typer.Option("--output", "-o", help="Output schema file path"),
68
+ ] = Path("schema.yaml"),
69
+ no_constraints: Annotated[
70
+ bool,
71
+ typer.Option("--no-constraints", help="Don't infer constraints from data"),
72
+ ] = False,
73
+ ) -> None:
74
+ """Learn schema from a data file."""
75
+ if not file.exists():
76
+ typer.echo(f"Error: File not found: {file}", err=True)
77
+ raise typer.Exit(1)
78
+
79
+ try:
80
+ schema = learn(str(file), infer_constraints=not no_constraints)
81
+ schema.save(output)
82
+ typer.echo(f"Schema saved to {output}")
83
+ typer.echo(f" Columns: {len(schema.columns)}")
84
+ typer.echo(f" Rows: {schema.row_count:,}")
85
+ except Exception as e:
86
+ typer.echo(f"Error: {e}", err=True)
87
+ raise typer.Exit(1)
88
+
89
+
90
+ @app.command(name="check")
91
+ def check_cmd(
92
+ file: Annotated[Path, typer.Argument(help="Path to the data file")],
93
+ validators: Annotated[
94
+ Optional[list[str]],
95
+ typer.Option("--validators", "-v", help="Comma-separated list of validators"),
96
+ ] = None,
97
+ min_severity: Annotated[
98
+ Optional[str],
99
+ typer.Option("--min-severity", "-s", help="Minimum severity level (low, medium, high, critical)"),
100
+ ] = None,
101
+ schema_file: Annotated[
102
+ Optional[Path],
103
+ typer.Option("--schema", help="Schema file for validation"),
104
+ ] = None,
105
+ auto_schema: Annotated[
106
+ bool,
107
+ typer.Option("--auto-schema", help="Auto-learn and cache schema (zero-config mode)"),
108
+ ] = False,
109
+ format: Annotated[
110
+ str,
111
+ typer.Option("--format", "-f", help="Output format (console, json, html)"),
112
+ ] = "console",
113
+ output: Annotated[
114
+ Optional[Path],
115
+ typer.Option("--output", "-o", help="Output file path"),
116
+ ] = None,
117
+ strict: Annotated[
118
+ bool,
119
+ typer.Option("--strict", help="Exit with code 1 if issues are found"),
120
+ ] = False,
121
+ ) -> None:
122
+ """Validate data quality in a file."""
123
+ if not file.exists():
124
+ typer.echo(f"Error: File not found: {file}", err=True)
125
+ raise typer.Exit(1)
126
+
127
+ if schema_file and not schema_file.exists():
128
+ typer.echo(f"Error: Schema file not found: {schema_file}", err=True)
129
+ raise typer.Exit(1)
130
+
131
+ # Parse validators if provided
132
+ validator_list = None
133
+ if validators:
134
+ validator_list = [v.strip() for v in ",".join(validators).split(",")]
135
+
136
+ try:
137
+ report = check(
138
+ str(file),
139
+ validators=validator_list,
140
+ min_severity=min_severity,
141
+ schema=schema_file,
142
+ auto_schema=auto_schema,
143
+ )
144
+ except Exception as e:
145
+ typer.echo(f"Error: {e}", err=True)
146
+ raise typer.Exit(1)
147
+
148
+ # Output the report
149
+ if format == "json":
150
+ result = report.to_json()
151
+ if output:
152
+ output.write_text(result)
153
+ typer.echo(f"Report written to {output}")
154
+ else:
155
+ typer.echo(result)
156
+ elif format == "html":
157
+ if not output:
158
+ typer.echo("Error: --output is required for HTML format", err=True)
159
+ raise typer.Exit(1)
160
+ # HTML output requires jinja2
161
+ try:
162
+ from truthound.html_report import generate_html_report
163
+
164
+ html = generate_html_report(report)
165
+ output.write_text(html)
166
+ typer.echo(f"HTML report written to {output}")
167
+ except ImportError:
168
+ typer.echo("Error: HTML reports require jinja2. Install with: pip install truthound[reports]", err=True)
169
+ raise typer.Exit(1)
170
+ else:
171
+ report.print()
172
+
173
+ # Exit with error if strict mode and issues found
174
+ if strict and report.has_issues:
175
+ raise typer.Exit(1)
176
+
177
+
178
+ @app.command(name="scan")
179
+ def scan_cmd(
180
+ file: Annotated[Path, typer.Argument(help="Path to the data file")],
181
+ format: Annotated[
182
+ str,
183
+ typer.Option("--format", "-f", help="Output format (console, json)"),
184
+ ] = "console",
185
+ output: Annotated[
186
+ Optional[Path],
187
+ typer.Option("--output", "-o", help="Output file path"),
188
+ ] = None,
189
+ ) -> None:
190
+ """Scan for personally identifiable information (PII)."""
191
+ if not file.exists():
192
+ typer.echo(f"Error: File not found: {file}", err=True)
193
+ raise typer.Exit(1)
194
+
195
+ try:
196
+ pii_report = scan(str(file))
197
+ except Exception as e:
198
+ typer.echo(f"Error: {e}", err=True)
199
+ raise typer.Exit(1)
200
+
201
+ if format == "json":
202
+ result = pii_report.to_json()
203
+ if output:
204
+ output.write_text(result)
205
+ typer.echo(f"Report written to {output}")
206
+ else:
207
+ typer.echo(result)
208
+ else:
209
+ pii_report.print()
210
+
211
+
212
+ @app.command(name="mask")
213
+ def mask_cmd(
214
+ file: Annotated[Path, typer.Argument(help="Path to the data file")],
215
+ output: Annotated[
216
+ Path,
217
+ typer.Option("--output", "-o", help="Output file path"),
218
+ ],
219
+ columns: Annotated[
220
+ Optional[list[str]],
221
+ typer.Option("--columns", "-c", help="Columns to mask (comma-separated)"),
222
+ ] = None,
223
+ strategy: Annotated[
224
+ str,
225
+ typer.Option("--strategy", "-s", help="Masking strategy (redact, hash, fake)"),
226
+ ] = "redact",
227
+ ) -> None:
228
+ """Mask sensitive data in a file."""
229
+ if not file.exists():
230
+ typer.echo(f"Error: File not found: {file}", err=True)
231
+ raise typer.Exit(1)
232
+
233
+ # Parse columns if provided
234
+ column_list = None
235
+ if columns:
236
+ column_list = [c.strip() for c in ",".join(columns).split(",")]
237
+
238
+ try:
239
+ masked_df = mask(str(file), columns=column_list, strategy=strategy)
240
+ except Exception as e:
241
+ typer.echo(f"Error: {e}", err=True)
242
+ raise typer.Exit(1)
243
+
244
+ # Write output based on file extension
245
+ suffix = output.suffix.lower()
246
+ if suffix == ".csv":
247
+ masked_df.write_csv(output)
248
+ elif suffix == ".parquet":
249
+ masked_df.write_parquet(output)
250
+ elif suffix == ".json":
251
+ masked_df.write_json(output)
252
+ else:
253
+ masked_df.write_csv(output)
254
+
255
+ typer.echo(f"Masked data written to {output}")
256
+
257
+
258
+ @app.command(name="profile")
259
+ def profile_cmd(
260
+ file: Annotated[Path, typer.Argument(help="Path to the data file")],
261
+ format: Annotated[
262
+ str,
263
+ typer.Option("--format", "-f", help="Output format (console, json)"),
264
+ ] = "console",
265
+ output: Annotated[
266
+ Optional[Path],
267
+ typer.Option("--output", "-o", help="Output file path"),
268
+ ] = None,
269
+ ) -> None:
270
+ """Generate a statistical profile of the data."""
271
+ if not file.exists():
272
+ typer.echo(f"Error: File not found: {file}", err=True)
273
+ raise typer.Exit(1)
274
+
275
+ try:
276
+ profile_report = profile(str(file))
277
+ except Exception as e:
278
+ typer.echo(f"Error: {e}", err=True)
279
+ raise typer.Exit(1)
280
+
281
+ if format == "json":
282
+ result = profile_report.to_json()
283
+ if output:
284
+ output.write_text(result)
285
+ typer.echo(f"Profile written to {output}")
286
+ else:
287
+ typer.echo(result)
288
+ else:
289
+ profile_report.print()
290
+
291
+
292
+ @app.command(name="compare")
293
+ def compare_cmd(
294
+ baseline: Annotated[Path, typer.Argument(help="Baseline (reference) data file")],
295
+ current: Annotated[Path, typer.Argument(help="Current data file to compare")],
296
+ columns: Annotated[
297
+ Optional[list[str]],
298
+ typer.Option("--columns", "-c", help="Columns to compare (comma-separated)"),
299
+ ] = None,
300
+ method: Annotated[
301
+ str,
302
+ typer.Option("--method", "-m", help="Detection method (auto, ks, psi, chi2, js)"),
303
+ ] = "auto",
304
+ threshold: Annotated[
305
+ Optional[float],
306
+ typer.Option("--threshold", "-t", help="Custom drift threshold"),
307
+ ] = None,
308
+ format: Annotated[
309
+ str,
310
+ typer.Option("--format", "-f", help="Output format (console, json)"),
311
+ ] = "console",
312
+ output: Annotated[
313
+ Optional[Path],
314
+ typer.Option("--output", "-o", help="Output file path"),
315
+ ] = None,
316
+ strict: Annotated[
317
+ bool,
318
+ typer.Option("--strict", help="Exit with code 1 if drift is detected"),
319
+ ] = False,
320
+ ) -> None:
321
+ """Compare two datasets and detect data drift."""
322
+ if not baseline.exists():
323
+ typer.echo(f"Error: Baseline file not found: {baseline}", err=True)
324
+ raise typer.Exit(1)
325
+
326
+ if not current.exists():
327
+ typer.echo(f"Error: Current file not found: {current}", err=True)
328
+ raise typer.Exit(1)
329
+
330
+ # Parse columns if provided
331
+ column_list = None
332
+ if columns:
333
+ column_list = [c.strip() for c in ",".join(columns).split(",")]
334
+
335
+ try:
336
+ drift_report = compare(
337
+ str(baseline),
338
+ str(current),
339
+ columns=column_list,
340
+ method=method,
341
+ threshold=threshold,
342
+ )
343
+ except Exception as e:
344
+ typer.echo(f"Error: {e}", err=True)
345
+ raise typer.Exit(1)
346
+
347
+ if format == "json":
348
+ result = drift_report.to_json()
349
+ if output:
350
+ output.write_text(result)
351
+ typer.echo(f"Drift report written to {output}")
352
+ else:
353
+ typer.echo(result)
354
+ else:
355
+ drift_report.print()
356
+
357
+ # Exit with error if strict mode and drift found
358
+ if strict and drift_report.has_drift:
359
+ raise typer.Exit(1)
360
+
361
+
362
+ # =============================================================================
363
+ # Checkpoint Commands
364
+ # =============================================================================
365
+
366
+
367
+ @checkpoint_app.command(name="run")
368
+ def checkpoint_run_cmd(
369
+ name: Annotated[str, typer.Argument(help="Name of checkpoint to run")],
370
+ config_file: Annotated[
371
+ Optional[Path],
372
+ typer.Option("--config", "-c", help="Checkpoint configuration file (YAML/JSON)"),
373
+ ] = None,
374
+ data_source: Annotated[
375
+ Optional[Path],
376
+ typer.Option("--data", "-d", help="Override data source path"),
377
+ ] = None,
378
+ validators: Annotated[
379
+ Optional[list[str]],
380
+ typer.Option("--validators", "-v", help="Override validators (comma-separated)"),
381
+ ] = None,
382
+ output: Annotated[
383
+ Optional[Path],
384
+ typer.Option("--output", "-o", help="Output file for results (JSON)"),
385
+ ] = None,
386
+ format: Annotated[
387
+ str,
388
+ typer.Option("--format", "-f", help="Output format (console, json)"),
389
+ ] = "console",
390
+ strict: Annotated[
391
+ bool,
392
+ typer.Option("--strict", help="Exit with code 1 if issues are found"),
393
+ ] = False,
394
+ store_result: Annotated[
395
+ Optional[Path],
396
+ typer.Option("--store", help="Store results to directory"),
397
+ ] = None,
398
+ notify_slack: Annotated[
399
+ Optional[str],
400
+ typer.Option("--slack", help="Slack webhook URL for notifications"),
401
+ ] = None,
402
+ notify_webhook: Annotated[
403
+ Optional[str],
404
+ typer.Option("--webhook", help="Webhook URL for notifications"),
405
+ ] = None,
406
+ github_summary: Annotated[
407
+ bool,
408
+ typer.Option("--github-summary", help="Write GitHub Actions job summary"),
409
+ ] = False,
410
+ ) -> None:
411
+ """Run a checkpoint validation pipeline."""
412
+ from truthound.checkpoint import Checkpoint, CheckpointRegistry
413
+ from truthound.checkpoint.actions import (
414
+ StoreValidationResult,
415
+ SlackNotification,
416
+ WebhookAction,
417
+ GitHubAction,
418
+ )
419
+
420
+ try:
421
+ # Load from config file or create ad-hoc
422
+ if config_file:
423
+ if not config_file.exists():
424
+ typer.echo(f"Error: Config file not found: {config_file}", err=True)
425
+ raise typer.Exit(1)
426
+
427
+ registry = CheckpointRegistry()
428
+ registry.load_from_yaml(config_file) if config_file.suffix in (".yaml", ".yml") else registry.load_from_json(config_file)
429
+
430
+ if name not in registry:
431
+ typer.echo(f"Error: Checkpoint '{name}' not found in config", err=True)
432
+ typer.echo(f"Available: {', '.join(registry.list_names())}")
433
+ raise typer.Exit(1)
434
+
435
+ checkpoint = registry.get(name)
436
+ else:
437
+ # Create ad-hoc checkpoint
438
+ if not data_source:
439
+ typer.echo("Error: --data is required when not using config file", err=True)
440
+ raise typer.Exit(1)
441
+
442
+ if not data_source.exists():
443
+ typer.echo(f"Error: Data file not found: {data_source}", err=True)
444
+ raise typer.Exit(1)
445
+
446
+ validator_list = None
447
+ if validators:
448
+ validator_list = [v.strip() for v in ",".join(validators).split(",")]
449
+
450
+ actions = []
451
+
452
+ # Add actions based on CLI options
453
+ if store_result:
454
+ actions.append(StoreValidationResult(store_path=str(store_result)))
455
+
456
+ if notify_slack:
457
+ actions.append(SlackNotification(
458
+ webhook_url=notify_slack,
459
+ notify_on="failure",
460
+ ))
461
+
462
+ if notify_webhook:
463
+ actions.append(WebhookAction(url=notify_webhook))
464
+
465
+ if github_summary:
466
+ actions.append(GitHubAction(
467
+ set_summary=True,
468
+ set_output=True,
469
+ ))
470
+
471
+ checkpoint = Checkpoint(
472
+ name=name,
473
+ data_source=str(data_source),
474
+ validators=validator_list,
475
+ actions=actions,
476
+ )
477
+
478
+ # Run checkpoint
479
+ result = checkpoint.run()
480
+
481
+ # Output results
482
+ if format == "json":
483
+ result_json = json.dumps(result.to_dict(), indent=2, default=str)
484
+ if output:
485
+ output.write_text(result_json)
486
+ typer.echo(f"Results written to {output}")
487
+ else:
488
+ typer.echo(result_json)
489
+ else:
490
+ typer.echo(result.summary())
491
+
492
+ # Exit code based on status
493
+ if strict and result.status.value in ("failure", "error"):
494
+ raise typer.Exit(1)
495
+
496
+ except typer.Exit:
497
+ raise
498
+ except Exception as e:
499
+ typer.echo(f"Error: {e}", err=True)
500
+ raise typer.Exit(1)
501
+
502
+
503
+ @checkpoint_app.command(name="list")
504
+ def checkpoint_list_cmd(
505
+ config_file: Annotated[
506
+ Optional[Path],
507
+ typer.Option("--config", "-c", help="Checkpoint configuration file"),
508
+ ] = None,
509
+ format: Annotated[
510
+ str,
511
+ typer.Option("--format", "-f", help="Output format (console, json)"),
512
+ ] = "console",
513
+ ) -> None:
514
+ """List available checkpoints."""
515
+ from truthound.checkpoint import CheckpointRegistry
516
+
517
+ try:
518
+ registry = CheckpointRegistry()
519
+
520
+ if config_file:
521
+ if not config_file.exists():
522
+ typer.echo(f"Error: Config file not found: {config_file}", err=True)
523
+ raise typer.Exit(1)
524
+
525
+ if config_file.suffix in (".yaml", ".yml"):
526
+ registry.load_from_yaml(config_file)
527
+ else:
528
+ registry.load_from_json(config_file)
529
+
530
+ checkpoints = registry.list_all()
531
+
532
+ if not checkpoints:
533
+ typer.echo("No checkpoints registered.")
534
+ return
535
+
536
+ if format == "json":
537
+ result = json.dumps([cp.to_dict() for cp in checkpoints], indent=2)
538
+ typer.echo(result)
539
+ else:
540
+ typer.echo(f"Checkpoints ({len(checkpoints)}):")
541
+ for cp in checkpoints:
542
+ typer.echo(f" - {cp.name}")
543
+ typer.echo(f" Data: {cp.config.data_source}")
544
+ typer.echo(f" Actions: {len(cp.actions)}")
545
+ typer.echo(f" Triggers: {len(cp.triggers)}")
546
+
547
+ except typer.Exit:
548
+ raise
549
+ except Exception as e:
550
+ typer.echo(f"Error: {e}", err=True)
551
+ raise typer.Exit(1)
552
+
553
+
554
+ @checkpoint_app.command(name="validate")
555
+ def checkpoint_validate_cmd(
556
+ config_file: Annotated[
557
+ Path,
558
+ typer.Argument(help="Checkpoint configuration file to validate"),
559
+ ],
560
+ ) -> None:
561
+ """Validate a checkpoint configuration file."""
562
+ from truthound.checkpoint import CheckpointRegistry
563
+
564
+ try:
565
+ if not config_file.exists():
566
+ typer.echo(f"Error: Config file not found: {config_file}", err=True)
567
+ raise typer.Exit(1)
568
+
569
+ registry = CheckpointRegistry()
570
+
571
+ if config_file.suffix in (".yaml", ".yml"):
572
+ checkpoints = registry.load_from_yaml(config_file)
573
+ else:
574
+ checkpoints = registry.load_from_json(config_file)
575
+
576
+ all_valid = True
577
+
578
+ for cp in checkpoints:
579
+ errors = cp.validate()
580
+ if errors:
581
+ all_valid = False
582
+ typer.echo(f"Checkpoint '{cp.name}' has errors:")
583
+ for err in errors:
584
+ typer.echo(f" - {err}")
585
+ else:
586
+ typer.echo(f"Checkpoint '{cp.name}' is valid")
587
+
588
+ if all_valid:
589
+ typer.echo(f"\nAll {len(checkpoints)} checkpoint(s) are valid.")
590
+ else:
591
+ typer.echo("\nSome checkpoints have validation errors.", err=True)
592
+ raise typer.Exit(1)
593
+
594
+ except typer.Exit:
595
+ raise
596
+ except Exception as e:
597
+ typer.echo(f"Error: {e}", err=True)
598
+ raise typer.Exit(1)
599
+
600
+
601
+ @checkpoint_app.command(name="init")
602
+ def checkpoint_init_cmd(
603
+ output: Annotated[
604
+ Path,
605
+ typer.Option("--output", "-o", help="Output file path"),
606
+ ] = Path("truthound.yaml"),
607
+ format: Annotated[
608
+ str,
609
+ typer.Option("--format", "-f", help="Config format (yaml, json)"),
610
+ ] = "yaml",
611
+ ) -> None:
612
+ """Initialize a sample checkpoint configuration file."""
613
+ import yaml
614
+
615
+ sample_config = {
616
+ "checkpoints": [
617
+ {
618
+ "name": "daily_data_validation",
619
+ "data_source": "data/production.csv",
620
+ "validators": ["null", "duplicate", "range", "regex"],
621
+ "min_severity": "medium",
622
+ "auto_schema": True,
623
+ "tags": {
624
+ "environment": "production",
625
+ "team": "data-platform",
626
+ },
627
+ "actions": [
628
+ {
629
+ "type": "store_result",
630
+ "store_path": "./truthound_results",
631
+ "partition_by": "date",
632
+ },
633
+ {
634
+ "type": "update_docs",
635
+ "site_path": "./truthound_docs",
636
+ "include_history": True,
637
+ },
638
+ {
639
+ "type": "slack",
640
+ "webhook_url": "https://hooks.slack.com/services/YOUR/WEBHOOK/URL",
641
+ "notify_on": "failure",
642
+ "channel": "#data-quality",
643
+ },
644
+ ],
645
+ "triggers": [
646
+ {
647
+ "type": "schedule",
648
+ "interval_hours": 24,
649
+ "run_on_weekdays": [0, 1, 2, 3, 4], # Mon-Fri
650
+ },
651
+ ],
652
+ },
653
+ {
654
+ "name": "hourly_metrics_check",
655
+ "data_source": "data/metrics.parquet",
656
+ "validators": ["null", "range"],
657
+ "actions": [
658
+ {
659
+ "type": "webhook",
660
+ "url": "https://api.example.com/data-quality/events",
661
+ "auth_type": "bearer",
662
+ "auth_credentials": {"token": "${API_TOKEN}"},
663
+ },
664
+ ],
665
+ "triggers": [
666
+ {
667
+ "type": "cron",
668
+ "expression": "0 * * * *", # Every hour
669
+ },
670
+ ],
671
+ },
672
+ ],
673
+ }
674
+
675
+ if format == "json":
676
+ output = output.with_suffix(".json")
677
+ output.write_text(json.dumps(sample_config, indent=2))
678
+ else:
679
+ output = output.with_suffix(".yaml")
680
+ import yaml
681
+ output.write_text(yaml.dump(sample_config, default_flow_style=False, sort_keys=False))
682
+
683
+ typer.echo(f"Sample checkpoint config created: {output}")
684
+ typer.echo("\nEdit the file to configure your checkpoints, then run:")
685
+ typer.echo(f" truthound checkpoint run <checkpoint_name> --config {output}")
686
+
687
+
688
+ # =============================================================================
689
+ # Auto-Profiling Commands (Phase 7)
690
+ # =============================================================================
691
+
692
+
693
+ @app.command(name="auto-profile")
694
+ def auto_profile_cmd(
695
+ file: Annotated[Path, typer.Argument(help="Path to the data file")],
696
+ output: Annotated[
697
+ Optional[Path],
698
+ typer.Option("--output", "-o", help="Output file path for profile JSON"),
699
+ ] = None,
700
+ format: Annotated[
701
+ str,
702
+ typer.Option("--format", "-f", help="Output format (console, json, yaml)"),
703
+ ] = "console",
704
+ include_patterns: Annotated[
705
+ bool,
706
+ typer.Option("--patterns/--no-patterns", help="Include pattern detection"),
707
+ ] = True,
708
+ include_correlations: Annotated[
709
+ bool,
710
+ typer.Option("--correlations/--no-correlations", help="Include correlation analysis"),
711
+ ] = False,
712
+ sample_size: Annotated[
713
+ Optional[int],
714
+ typer.Option("--sample", "-s", help="Sample size for profiling (default: all rows)"),
715
+ ] = None,
716
+ top_n: Annotated[
717
+ int,
718
+ typer.Option("--top-n", help="Number of top/bottom values to include"),
719
+ ] = 10,
720
+ ) -> None:
721
+ """Profile data with auto-detection of types and patterns.
722
+
723
+ This performs comprehensive profiling including:
724
+ - Column statistics (null ratio, unique ratio, distribution)
725
+ - Type inference (email, phone, UUID, etc.)
726
+ - Pattern detection
727
+ - Suggested validation rules
728
+ """
729
+ if not file.exists():
730
+ typer.echo(f"Error: File not found: {file}", err=True)
731
+ raise typer.Exit(1)
732
+
733
+ try:
734
+ from truthound.profiler import (
735
+ DataProfiler,
736
+ ProfilerConfig,
737
+ profile_file,
738
+ save_profile,
739
+ )
740
+
741
+ config = ProfilerConfig(
742
+ include_patterns=include_patterns,
743
+ include_correlations=include_correlations,
744
+ sample_size=sample_size,
745
+ top_n_values=top_n,
746
+ )
747
+
748
+ profiler = DataProfiler(config=config)
749
+
750
+ typer.echo(f"Profiling {file}...")
751
+ profile_result = profiler.profile(
752
+ _read_file_as_lazy(file),
753
+ name=file.stem,
754
+ source=str(file),
755
+ )
756
+
757
+ if format == "json":
758
+ import json as json_mod
759
+ result = json_mod.dumps(profile_result.to_dict(), indent=2, default=str)
760
+ if output:
761
+ with open(output, "w", encoding="utf-8") as f:
762
+ f.write(result)
763
+ typer.echo(f"Profile saved to {output}")
764
+ else:
765
+ typer.echo(result)
766
+
767
+ elif format == "yaml":
768
+ import yaml
769
+ result = yaml.dump(profile_result.to_dict(), default_flow_style=False)
770
+ if output:
771
+ with open(output, "w", encoding="utf-8") as f:
772
+ f.write(result)
773
+ typer.echo(f"Profile saved to {output}")
774
+ else:
775
+ typer.echo(result)
776
+
777
+ else: # console
778
+ _print_profile_summary(profile_result)
779
+ if output:
780
+ save_profile(profile_result, output)
781
+ typer.echo(f"\nFull profile saved to {output}")
782
+
783
+ except Exception as e:
784
+ typer.echo(f"Error: {e}", err=True)
785
+ raise typer.Exit(1)
786
+
787
+
788
+ @app.command(name="generate-suite")
789
+ def generate_suite_cmd(
790
+ profile_file: Annotated[
791
+ Path,
792
+ typer.Argument(help="Path to profile JSON file (from auto-profile)"),
793
+ ],
794
+ output: Annotated[
795
+ Optional[Path],
796
+ typer.Option("--output", "-o", help="Output file path"),
797
+ ] = None,
798
+ format: Annotated[
799
+ str,
800
+ typer.Option(
801
+ "--format", "-f",
802
+ help="Output format (yaml, json, python, toml, checkpoint)",
803
+ ),
804
+ ] = "yaml",
805
+ strictness: Annotated[
806
+ str,
807
+ typer.Option("--strictness", "-s", help="Rule strictness (loose, medium, strict)"),
808
+ ] = "medium",
809
+ include: Annotated[
810
+ Optional[list[str]],
811
+ typer.Option("--include", "-i", help="Include only these categories"),
812
+ ] = None,
813
+ exclude: Annotated[
814
+ Optional[list[str]],
815
+ typer.Option("--exclude", "-e", help="Exclude these categories"),
816
+ ] = None,
817
+ min_confidence: Annotated[
818
+ Optional[str],
819
+ typer.Option("--min-confidence", help="Minimum rule confidence (low, medium, high)"),
820
+ ] = None,
821
+ name: Annotated[
822
+ Optional[str],
823
+ typer.Option("--name", "-n", help="Name for the validation suite"),
824
+ ] = None,
825
+ preset: Annotated[
826
+ Optional[str],
827
+ typer.Option(
828
+ "--preset", "-p",
829
+ help="Configuration preset (default, strict, loose, minimal, comprehensive, ci_cd)",
830
+ ),
831
+ ] = None,
832
+ config: Annotated[
833
+ Optional[Path],
834
+ typer.Option("--config", "-c", help="Path to configuration file"),
835
+ ] = None,
836
+ group_by_category: Annotated[
837
+ bool,
838
+ typer.Option("--group-by-category", help="Group rules by category in output"),
839
+ ] = False,
840
+ code_style: Annotated[
841
+ str,
842
+ typer.Option(
843
+ "--code-style",
844
+ help="Python code style (functional, class_based, declarative)",
845
+ ),
846
+ ] = "functional",
847
+ ) -> None:
848
+ """Generate validation rules from a profile.
849
+
850
+ This creates a validation suite based on the data profile.
851
+ Categories available: schema, completeness, uniqueness, format,
852
+ distribution, pattern, temporal, relationship, anomaly
853
+
854
+ Output formats:
855
+ - yaml: Human-readable YAML (default)
856
+ - json: Machine-readable JSON
857
+ - python: Executable Python code
858
+ - toml: TOML configuration
859
+ - checkpoint: Truthound checkpoint format for CI/CD
860
+
861
+ Examples:
862
+ # Generate from profile
863
+ truthound generate-suite profile.json -o rules.yaml
864
+
865
+ # Only schema and format rules
866
+ truthound generate-suite profile.json -i schema -i format
867
+
868
+ # Strict mode with preset
869
+ truthound generate-suite profile.json --preset strict
870
+
871
+ # Generate Python code with class-based style
872
+ truthound generate-suite profile.json -f python --code-style class_based
873
+
874
+ # Generate CI/CD checkpoint
875
+ truthound generate-suite profile.json -f checkpoint -o ci_rules.yaml
876
+
877
+ # Use configuration file
878
+ truthound generate-suite profile.json --config suite_config.yaml
879
+ """
880
+ if not profile_file.exists():
881
+ typer.echo(f"Error: Profile file not found: {profile_file}", err=True)
882
+ raise typer.Exit(1)
883
+
884
+ try:
885
+ from truthound.profiler import (
886
+ run_generate_suite,
887
+ get_available_formats,
888
+ get_available_presets,
889
+ )
890
+
891
+ # Validate format
892
+ available_formats = get_available_formats()
893
+ if format not in available_formats:
894
+ typer.echo(
895
+ f"Error: Invalid format '{format}'. "
896
+ f"Available: {', '.join(available_formats)}",
897
+ err=True,
898
+ )
899
+ raise typer.Exit(1)
900
+
901
+ # Validate preset
902
+ if preset:
903
+ available_presets = get_available_presets()
904
+ if preset not in available_presets:
905
+ typer.echo(
906
+ f"Error: Invalid preset '{preset}'. "
907
+ f"Available: {', '.join(available_presets)}",
908
+ err=True,
909
+ )
910
+ raise typer.Exit(1)
911
+
912
+ # Parse categories
913
+ include_cats = None
914
+ if include:
915
+ include_cats = [c.strip() for c in ",".join(include).split(",")]
916
+
917
+ exclude_cats = None
918
+ if exclude:
919
+ exclude_cats = [c.strip() for c in ",".join(exclude).split(",")]
920
+
921
+ # Run generation using the new handler
922
+ exit_code = run_generate_suite(
923
+ profile_file=profile_file,
924
+ output=output,
925
+ format=format,
926
+ strictness=strictness,
927
+ include=include_cats,
928
+ exclude=exclude_cats,
929
+ min_confidence=min_confidence,
930
+ name=name,
931
+ preset=preset,
932
+ config=config,
933
+ group_by_category=group_by_category,
934
+ echo=typer.echo,
935
+ verbose=True,
936
+ )
937
+
938
+ if exit_code != 0:
939
+ raise typer.Exit(exit_code)
940
+
941
+ except typer.Exit:
942
+ raise
943
+ except Exception as e:
944
+ typer.echo(f"Error: {e}", err=True)
945
+ raise typer.Exit(1)
946
+
947
+
948
+ @app.command(name="quick-suite")
949
+ def quick_suite_cmd(
950
+ file: Annotated[Path, typer.Argument(help="Path to the data file")],
951
+ output: Annotated[
952
+ Optional[Path],
953
+ typer.Option("--output", "-o", help="Output file path"),
954
+ ] = None,
955
+ format: Annotated[
956
+ str,
957
+ typer.Option(
958
+ "--format", "-f",
959
+ help="Output format (yaml, json, python, toml, checkpoint)",
960
+ ),
961
+ ] = "yaml",
962
+ strictness: Annotated[
963
+ str,
964
+ typer.Option("--strictness", "-s", help="Rule strictness (loose, medium, strict)"),
965
+ ] = "medium",
966
+ include: Annotated[
967
+ Optional[list[str]],
968
+ typer.Option("--include", "-i", help="Include only these categories"),
969
+ ] = None,
970
+ exclude: Annotated[
971
+ Optional[list[str]],
972
+ typer.Option("--exclude", "-e", help="Exclude these categories"),
973
+ ] = None,
974
+ min_confidence: Annotated[
975
+ Optional[str],
976
+ typer.Option("--min-confidence", help="Minimum rule confidence (low, medium, high)"),
977
+ ] = None,
978
+ name: Annotated[
979
+ Optional[str],
980
+ typer.Option("--name", "-n", help="Name for the validation suite"),
981
+ ] = None,
982
+ preset: Annotated[
983
+ Optional[str],
984
+ typer.Option(
985
+ "--preset", "-p",
986
+ help="Configuration preset (default, strict, loose, minimal, comprehensive, ci_cd)",
987
+ ),
988
+ ] = None,
989
+ sample_size: Annotated[
990
+ Optional[int],
991
+ typer.Option("--sample-size", help="Sample size for profiling (default: auto)"),
992
+ ] = None,
993
+ ) -> None:
994
+ """Profile data and generate validation rules in one step.
995
+
996
+ This is a convenience command that combines auto-profile and generate-suite.
997
+
998
+ Output formats:
999
+ - yaml: Human-readable YAML (default)
1000
+ - json: Machine-readable JSON
1001
+ - python: Executable Python code
1002
+ - toml: TOML configuration
1003
+ - checkpoint: Truthound checkpoint format for CI/CD
1004
+
1005
+ Examples:
1006
+ # Basic usage
1007
+ truthound quick-suite data.parquet -o rules.yaml
1008
+
1009
+ # Strict mode with Python output
1010
+ truthound quick-suite data.csv -s strict -f python -o validators.py
1011
+
1012
+ # CI/CD checkpoint
1013
+ truthound quick-suite data.parquet --preset ci_cd -o ci_rules.yaml
1014
+
1015
+ # With sampling for large files
1016
+ truthound quick-suite large_data.parquet --sample-size 10000
1017
+ """
1018
+ if not file.exists():
1019
+ typer.echo(f"Error: File not found: {file}", err=True)
1020
+ raise typer.Exit(1)
1021
+
1022
+ try:
1023
+ from truthound.profiler import (
1024
+ run_quick_suite,
1025
+ get_available_formats,
1026
+ get_available_presets,
1027
+ )
1028
+
1029
+ # Validate format
1030
+ available_formats = get_available_formats()
1031
+ if format not in available_formats:
1032
+ typer.echo(
1033
+ f"Error: Invalid format '{format}'. "
1034
+ f"Available: {', '.join(available_formats)}",
1035
+ err=True,
1036
+ )
1037
+ raise typer.Exit(1)
1038
+
1039
+ # Validate preset
1040
+ if preset:
1041
+ available_presets = get_available_presets()
1042
+ if preset not in available_presets:
1043
+ typer.echo(
1044
+ f"Error: Invalid preset '{preset}'. "
1045
+ f"Available: {', '.join(available_presets)}",
1046
+ err=True,
1047
+ )
1048
+ raise typer.Exit(1)
1049
+
1050
+ # Parse categories
1051
+ include_cats = None
1052
+ if include:
1053
+ include_cats = [c.strip() for c in ",".join(include).split(",")]
1054
+
1055
+ exclude_cats = None
1056
+ if exclude:
1057
+ exclude_cats = [c.strip() for c in ",".join(exclude).split(",")]
1058
+
1059
+ # Run quick suite using the new handler
1060
+ exit_code = run_quick_suite(
1061
+ file=file,
1062
+ output=output,
1063
+ format=format,
1064
+ strictness=strictness,
1065
+ include=include_cats,
1066
+ exclude=exclude_cats,
1067
+ min_confidence=min_confidence,
1068
+ name=name,
1069
+ preset=preset,
1070
+ sample_size=sample_size,
1071
+ echo=typer.echo,
1072
+ verbose=True,
1073
+ )
1074
+
1075
+ if exit_code != 0:
1076
+ raise typer.Exit(exit_code)
1077
+
1078
+ except typer.Exit:
1079
+ raise
1080
+ except Exception as e:
1081
+ typer.echo(f"Error: {e}", err=True)
1082
+ raise typer.Exit(1)
1083
+
1084
+
1085
+ @app.command(name="list-formats")
1086
+ def list_formats_cmd() -> None:
1087
+ """List available output formats for suite generation."""
1088
+ try:
1089
+ from truthound.profiler import get_available_formats
1090
+
1091
+ typer.echo("Available output formats:")
1092
+ typer.echo("")
1093
+ formats_info = {
1094
+ "yaml": "Human-readable YAML format (default)",
1095
+ "json": "Machine-readable JSON format",
1096
+ "python": "Executable Python code with validators",
1097
+ "toml": "TOML configuration format",
1098
+ "checkpoint": "Truthound checkpoint format for CI/CD",
1099
+ }
1100
+
1101
+ for fmt in get_available_formats():
1102
+ desc = formats_info.get(fmt, "")
1103
+ typer.echo(f" {fmt:12} - {desc}")
1104
+
1105
+ except Exception as e:
1106
+ typer.echo(f"Error: {e}", err=True)
1107
+ raise typer.Exit(1)
1108
+
1109
+
1110
+ @app.command(name="list-presets")
1111
+ def list_presets_cmd() -> None:
1112
+ """List available configuration presets for suite generation."""
1113
+ try:
1114
+ from truthound.profiler import get_available_presets
1115
+
1116
+ typer.echo("Available configuration presets:")
1117
+ typer.echo("")
1118
+ presets_info = {
1119
+ "default": "Balanced settings (medium strictness, all categories)",
1120
+ "strict": "Strict validation rules with high confidence",
1121
+ "loose": "Relaxed validation for flexible data",
1122
+ "minimal": "Only high-confidence schema rules",
1123
+ "comprehensive": "All generators with detailed output",
1124
+ "schema_only": "Schema and completeness rules only",
1125
+ "format_only": "Format and pattern rules only",
1126
+ "ci_cd": "Optimized for CI/CD pipelines (checkpoint format)",
1127
+ "development": "Development-friendly (Python code output)",
1128
+ "production": "Production-ready (strict, high confidence)",
1129
+ }
1130
+
1131
+ for preset in get_available_presets():
1132
+ desc = presets_info.get(preset, "")
1133
+ typer.echo(f" {preset:16} - {desc}")
1134
+
1135
+ except Exception as e:
1136
+ typer.echo(f"Error: {e}", err=True)
1137
+ raise typer.Exit(1)
1138
+
1139
+
1140
+ @app.command(name="list-categories")
1141
+ def list_categories_cmd() -> None:
1142
+ """List available rule categories for suite generation."""
1143
+ try:
1144
+ from truthound.profiler import get_available_categories
1145
+
1146
+ typer.echo("Available rule categories:")
1147
+ typer.echo("")
1148
+ categories_info = {
1149
+ "schema": "Column existence, types, and structure",
1150
+ "completeness": "Null values and data completeness",
1151
+ "uniqueness": "Unique constraints and cardinality",
1152
+ "format": "Data format validation (email, phone, etc.)",
1153
+ "distribution": "Statistical distribution checks",
1154
+ "pattern": "Regex pattern matching",
1155
+ "temporal": "Date/time validation",
1156
+ "relationship": "Cross-column relationships",
1157
+ "anomaly": "Anomaly detection rules",
1158
+ }
1159
+
1160
+ for cat in get_available_categories():
1161
+ desc = categories_info.get(cat, "")
1162
+ typer.echo(f" {cat:14} - {desc}")
1163
+
1164
+ except Exception as e:
1165
+ typer.echo(f"Error: {e}", err=True)
1166
+ raise typer.Exit(1)
1167
+
1168
+
1169
+ # =============================================================================
1170
+ # Helper Functions
1171
+ # =============================================================================
1172
+
1173
+
1174
+ def _read_file_as_lazy(path: Path):
1175
+ """Read a file as a Polars LazyFrame."""
1176
+ import polars as pl
1177
+
1178
+ suffix = path.suffix.lower()
1179
+ readers = {
1180
+ ".parquet": pl.scan_parquet,
1181
+ ".csv": pl.scan_csv,
1182
+ ".json": pl.scan_ndjson,
1183
+ ".ndjson": pl.scan_ndjson,
1184
+ }
1185
+
1186
+ if suffix not in readers:
1187
+ raise ValueError(
1188
+ f"Unsupported file type: {suffix}. "
1189
+ f"Supported: {list(readers.keys())}"
1190
+ )
1191
+
1192
+ return readers[suffix](path)
1193
+
1194
+
1195
+ # =============================================================================
1196
+ # Benchmark Commands
1197
+ # =============================================================================
1198
+
1199
+ benchmark_app = typer.Typer(
1200
+ name="benchmark",
1201
+ help="Performance benchmarking commands",
1202
+ )
1203
+ app.add_typer(benchmark_app, name="benchmark")
1204
+
1205
+
1206
+ @benchmark_app.command(name="run")
1207
+ def benchmark_run_cmd(
1208
+ benchmark: Annotated[
1209
+ Optional[str],
1210
+ typer.Argument(help="Benchmark name to run (e.g., 'profile', 'check')"),
1211
+ ] = None,
1212
+ suite: Annotated[
1213
+ Optional[str],
1214
+ typer.Option("--suite", "-s", help="Predefined suite to run (quick, ci, full, profiling, validation)"),
1215
+ ] = None,
1216
+ size: Annotated[
1217
+ str,
1218
+ typer.Option("--size", help="Data size (tiny, small, medium, large, xlarge)"),
1219
+ ] = "medium",
1220
+ rows: Annotated[
1221
+ Optional[int],
1222
+ typer.Option("--rows", "-r", help="Custom row count (overrides size)"),
1223
+ ] = None,
1224
+ iterations: Annotated[
1225
+ int,
1226
+ typer.Option("--iterations", "-i", help="Number of measurement iterations"),
1227
+ ] = 5,
1228
+ warmup: Annotated[
1229
+ int,
1230
+ typer.Option("--warmup", "-w", help="Number of warmup iterations"),
1231
+ ] = 2,
1232
+ output: Annotated[
1233
+ Optional[Path],
1234
+ typer.Option("--output", "-o", help="Output file path"),
1235
+ ] = None,
1236
+ format: Annotated[
1237
+ str,
1238
+ typer.Option("--format", "-f", help="Output format (console, json, markdown, html)"),
1239
+ ] = "console",
1240
+ save_baseline: Annotated[
1241
+ bool,
1242
+ typer.Option("--save-baseline", help="Save results as baseline for regression detection"),
1243
+ ] = False,
1244
+ compare_baseline: Annotated[
1245
+ bool,
1246
+ typer.Option("--compare-baseline", help="Compare against saved baseline"),
1247
+ ] = False,
1248
+ verbose: Annotated[
1249
+ bool,
1250
+ typer.Option("--verbose", "-v", help="Verbose output"),
1251
+ ] = False,
1252
+ ) -> None:
1253
+ """Run performance benchmarks.
1254
+
1255
+ Examples:
1256
+ # Run a single benchmark
1257
+ truthound benchmark run profile --size medium
1258
+
1259
+ # Run a predefined suite
1260
+ truthound benchmark run --suite quick
1261
+
1262
+ # Run with custom row count
1263
+ truthound benchmark run check --rows 1000000
1264
+
1265
+ # Save as baseline
1266
+ truthound benchmark run --suite ci --save-baseline
1267
+
1268
+ # Compare against baseline
1269
+ truthound benchmark run --suite ci --compare-baseline
1270
+ """
1271
+ from truthound.benchmark import (
1272
+ BenchmarkRunner,
1273
+ BenchmarkSuite,
1274
+ BenchmarkConfig,
1275
+ BenchmarkSize,
1276
+ RunnerConfig,
1277
+ ConsoleReporter,
1278
+ JSONReporter,
1279
+ MarkdownReporter,
1280
+ HTMLReporter,
1281
+ RegressionDetector,
1282
+ )
1283
+
1284
+ try:
1285
+ # Determine row count
1286
+ size_map = {
1287
+ "tiny": BenchmarkSize.TINY,
1288
+ "small": BenchmarkSize.SMALL,
1289
+ "medium": BenchmarkSize.MEDIUM,
1290
+ "large": BenchmarkSize.LARGE,
1291
+ "xlarge": BenchmarkSize.XLARGE,
1292
+ }
1293
+ benchmark_size = size_map.get(size, BenchmarkSize.MEDIUM)
1294
+ row_count = rows if rows else benchmark_size.row_count
1295
+
1296
+ # Configure benchmark
1297
+ benchmark_config = BenchmarkConfig(
1298
+ warmup_iterations=warmup,
1299
+ measure_iterations=iterations,
1300
+ default_size=benchmark_size,
1301
+ verbose=verbose,
1302
+ )
1303
+
1304
+ runner_config = RunnerConfig(
1305
+ size_override=benchmark_size if not rows else None,
1306
+ verbose=verbose,
1307
+ )
1308
+
1309
+ runner = BenchmarkRunner(
1310
+ config=runner_config,
1311
+ benchmark_config=benchmark_config,
1312
+ )
1313
+
1314
+ # Determine what to run
1315
+ if suite:
1316
+ suite_map = {
1317
+ "quick": BenchmarkSuite.quick,
1318
+ "ci": BenchmarkSuite.ci,
1319
+ "full": lambda: BenchmarkSuite.full(benchmark_size),
1320
+ "profiling": lambda: BenchmarkSuite.profiling(benchmark_size),
1321
+ "validation": lambda: BenchmarkSuite.validation(benchmark_size),
1322
+ }
1323
+ if suite not in suite_map:
1324
+ typer.echo(f"Unknown suite: {suite}. Available: {list(suite_map.keys())}", err=True)
1325
+ raise typer.Exit(1)
1326
+
1327
+ benchmark_suite = suite_map[suite]()
1328
+ results = runner.run_suite(benchmark_suite)
1329
+
1330
+ elif benchmark:
1331
+ result = runner.run(benchmark, row_count=row_count)
1332
+ # Wrap single result in suite result for consistent handling
1333
+ from truthound.benchmark.base import EnvironmentInfo
1334
+ from truthound.benchmark.runner import SuiteResult
1335
+ results = SuiteResult(
1336
+ suite_name=f"single:{benchmark}",
1337
+ results=[result],
1338
+ environment=EnvironmentInfo.capture(),
1339
+ )
1340
+ results.completed_at = result.completed_at
1341
+
1342
+ else:
1343
+ typer.echo("Specify either a benchmark name or --suite", err=True)
1344
+ raise typer.Exit(1)
1345
+
1346
+ # Compare against baseline if requested
1347
+ if compare_baseline:
1348
+ detector = RegressionDetector()
1349
+ report = detector.generate_report(results)
1350
+ typer.echo(report)
1351
+
1352
+ regressions = detector.check(results)
1353
+ if regressions:
1354
+ typer.echo("\n⚠️ Performance regressions detected!", err=True)
1355
+ raise typer.Exit(1)
1356
+
1357
+ # Generate output
1358
+ reporters = {
1359
+ "console": ConsoleReporter(use_colors=True),
1360
+ "json": JSONReporter(pretty=True),
1361
+ "markdown": MarkdownReporter(),
1362
+ "html": HTMLReporter(),
1363
+ }
1364
+
1365
+ reporter = reporters.get(format, ConsoleReporter())
1366
+ report_content = reporter.report_suite(results)
1367
+
1368
+ if output:
1369
+ output.parent.mkdir(parents=True, exist_ok=True)
1370
+ output.write_text(report_content)
1371
+ typer.echo(f"Results saved to: {output}")
1372
+ elif format == "console":
1373
+ typer.echo(report_content)
1374
+ else:
1375
+ typer.echo(report_content)
1376
+
1377
+ # Save baseline if requested
1378
+ if save_baseline:
1379
+ detector = RegressionDetector()
1380
+ detector.save_baseline(results)
1381
+ typer.echo(f"Baseline saved to: {detector.history_path}")
1382
+
1383
+ except typer.Exit:
1384
+ raise
1385
+ except Exception as e:
1386
+ typer.echo(f"Error: {e}", err=True)
1387
+ if verbose:
1388
+ import traceback
1389
+ typer.echo(traceback.format_exc(), err=True)
1390
+ raise typer.Exit(1)
1391
+
1392
+
1393
+ @benchmark_app.command(name="list")
1394
+ def benchmark_list_cmd(
1395
+ format: Annotated[
1396
+ str,
1397
+ typer.Option("--format", "-f", help="Output format (console, json)"),
1398
+ ] = "console",
1399
+ ) -> None:
1400
+ """List available benchmarks."""
1401
+ from truthound.benchmark import benchmark_registry
1402
+
1403
+ benchmarks = benchmark_registry.list_all()
1404
+
1405
+ if format == "json":
1406
+ data = [
1407
+ {
1408
+ "name": b.name,
1409
+ "category": b.category.value,
1410
+ "description": b.description,
1411
+ }
1412
+ for b in benchmarks
1413
+ ]
1414
+ typer.echo(json.dumps(data, indent=2))
1415
+ else:
1416
+ typer.echo("\nAvailable Benchmarks:")
1417
+ typer.echo("=" * 60)
1418
+
1419
+ # Group by category
1420
+ from collections import defaultdict
1421
+ by_category = defaultdict(list)
1422
+ for b in benchmarks:
1423
+ by_category[b.category.value].append(b)
1424
+
1425
+ for category in sorted(by_category.keys()):
1426
+ typer.echo(f"\n[{category.upper()}]")
1427
+ for b in by_category[category]:
1428
+ typer.echo(f" {b.name:20} - {b.description}")
1429
+
1430
+
1431
+ @benchmark_app.command(name="compare")
1432
+ def benchmark_compare_cmd(
1433
+ baseline: Annotated[
1434
+ Path,
1435
+ typer.Argument(help="Baseline results JSON file"),
1436
+ ],
1437
+ current: Annotated[
1438
+ Path,
1439
+ typer.Argument(help="Current results JSON file"),
1440
+ ],
1441
+ threshold: Annotated[
1442
+ float,
1443
+ typer.Option("--threshold", "-t", help="Regression threshold percentage"),
1444
+ ] = 10.0,
1445
+ format: Annotated[
1446
+ str,
1447
+ typer.Option("--format", "-f", help="Output format (console, json, markdown)"),
1448
+ ] = "console",
1449
+ ) -> None:
1450
+ """Compare two benchmark results for regressions."""
1451
+ from truthound.benchmark import BenchmarkComparator
1452
+ from truthound.benchmark.runner import SuiteResult
1453
+
1454
+ if not baseline.exists():
1455
+ typer.echo(f"Baseline file not found: {baseline}", err=True)
1456
+ raise typer.Exit(1)
1457
+
1458
+ if not current.exists():
1459
+ typer.echo(f"Current file not found: {current}", err=True)
1460
+ raise typer.Exit(1)
1461
+
1462
+ try:
1463
+ baseline_data = json.loads(baseline.read_text())
1464
+ current_data = json.loads(current.read_text())
1465
+
1466
+ comparator = BenchmarkComparator(regression_threshold=threshold / 100)
1467
+
1468
+ # This is a simplified comparison - full implementation would
1469
+ # reconstruct SuiteResult objects
1470
+ typer.echo("\nBenchmark Comparison")
1471
+ typer.echo("=" * 60)
1472
+ typer.echo(f"Baseline: {baseline}")
1473
+ typer.echo(f"Current: {current}")
1474
+ typer.echo(f"Threshold: {threshold}%")
1475
+ typer.echo("-" * 60)
1476
+
1477
+ baseline_results = {r["benchmark_name"]: r for r in baseline_data.get("results", [])}
1478
+ current_results = {r["benchmark_name"]: r for r in current_data.get("results", [])}
1479
+
1480
+ regressions = []
1481
+ improvements = []
1482
+
1483
+ for name, curr in current_results.items():
1484
+ if name not in baseline_results:
1485
+ continue
1486
+
1487
+ base = baseline_results[name]
1488
+ base_duration = base["metrics"]["timing"]["mean_seconds"]
1489
+ curr_duration = curr["metrics"]["timing"]["mean_seconds"]
1490
+
1491
+ if base_duration > 0:
1492
+ pct_change = ((curr_duration - base_duration) / base_duration) * 100
1493
+
1494
+ if pct_change > threshold:
1495
+ regressions.append((name, base_duration, curr_duration, pct_change))
1496
+ elif pct_change < -threshold:
1497
+ improvements.append((name, base_duration, curr_duration, pct_change))
1498
+
1499
+ if regressions:
1500
+ typer.echo("\n🔴 REGRESSIONS:")
1501
+ for name, base_d, curr_d, pct in regressions:
1502
+ typer.echo(f" {name}: {base_d:.3f}s -> {curr_d:.3f}s ({pct:+.1f}%)")
1503
+
1504
+ if improvements:
1505
+ typer.echo("\n🟢 IMPROVEMENTS:")
1506
+ for name, base_d, curr_d, pct in improvements:
1507
+ typer.echo(f" {name}: {base_d:.3f}s -> {curr_d:.3f}s ({pct:+.1f}%)")
1508
+
1509
+ if not regressions and not improvements:
1510
+ typer.echo("\n✅ No significant changes detected.")
1511
+
1512
+ typer.echo("")
1513
+
1514
+ if regressions:
1515
+ raise typer.Exit(1)
1516
+
1517
+ except typer.Exit:
1518
+ raise
1519
+ except Exception as e:
1520
+ typer.echo(f"Error: {e}", err=True)
1521
+ raise typer.Exit(1)
1522
+
1523
+
1524
+ def _print_profile_summary(profile) -> None:
1525
+ """Print a summary of the profile to console."""
1526
+ typer.echo(f"\n{'='*60}")
1527
+ typer.echo(f"Profile: {profile.name or 'unnamed'}")
1528
+ typer.echo(f"{'='*60}")
1529
+ typer.echo(f"Rows: {profile.row_count:,}")
1530
+ typer.echo(f"Columns: {profile.column_count}")
1531
+ typer.echo(f"Estimated Memory: {profile.estimated_memory_bytes / 1024 / 1024:.2f} MB")
1532
+
1533
+ if profile.duplicate_row_ratio > 0:
1534
+ typer.echo(f"Duplicate Rows: {profile.duplicate_row_count:,} ({profile.duplicate_row_ratio*100:.1f}%)")
1535
+
1536
+ typer.echo(f"\n{'Column Details':^60}")
1537
+ typer.echo("-" * 60)
1538
+
1539
+ for col in profile.columns:
1540
+ typer.echo(f"\n{col.name}")
1541
+ typer.echo(f" Type: {col.physical_type} -> {col.inferred_type.value}")
1542
+ typer.echo(f" Nulls: {col.null_count:,} ({col.null_ratio*100:.1f}%)")
1543
+ typer.echo(f" Unique: {col.distinct_count:,} ({col.unique_ratio*100:.1f}%)")
1544
+
1545
+ if col.distribution:
1546
+ dist = col.distribution
1547
+ typer.echo(f" Range: [{dist.min}, {dist.max}]")
1548
+ if dist.mean is not None:
1549
+ typer.echo(f" Mean: {dist.mean:.2f}, Std: {dist.std:.2f}")
1550
+
1551
+ if col.min_length is not None:
1552
+ typer.echo(f" Length: [{col.min_length}, {col.max_length}], avg={col.avg_length:.1f}")
1553
+
1554
+ if col.detected_patterns:
1555
+ patterns = [p.pattern for p in col.detected_patterns[:3]]
1556
+ typer.echo(f" Patterns: {', '.join(patterns)}")
1557
+
1558
+ if col.suggested_validators:
1559
+ typer.echo(f" Suggested: {len(col.suggested_validators)} validators")
1560
+
1561
+
1562
+ # =============================================================================
1563
+ # Data Docs Commands (Phase 8)
1564
+ # =============================================================================
1565
+
1566
+ docs_app = typer.Typer(
1567
+ name="docs",
1568
+ help="Generate data documentation and reports (Phase 8)",
1569
+ )
1570
+ app.add_typer(docs_app, name="docs")
1571
+
1572
+
1573
+ @docs_app.command(name="generate")
1574
+ def docs_generate_cmd(
1575
+ profile_file: Annotated[
1576
+ Path,
1577
+ typer.Argument(help="Path to profile JSON file (from auto-profile)"),
1578
+ ],
1579
+ output: Annotated[
1580
+ Optional[Path],
1581
+ typer.Option("--output", "-o", help="Output file path"),
1582
+ ] = None,
1583
+ title: Annotated[
1584
+ str,
1585
+ typer.Option("--title", "-t", help="Report title"),
1586
+ ] = "Data Profile Report",
1587
+ subtitle: Annotated[
1588
+ str,
1589
+ typer.Option("--subtitle", "-s", help="Report subtitle"),
1590
+ ] = "",
1591
+ theme: Annotated[
1592
+ str,
1593
+ typer.Option("--theme", help="Report theme (light, dark, professional, minimal, modern)"),
1594
+ ] = "professional",
1595
+ chart_library: Annotated[
1596
+ str,
1597
+ typer.Option("--charts", "-c", help="Chart library (apexcharts, chartjs, plotly, svg)"),
1598
+ ] = "apexcharts",
1599
+ format: Annotated[
1600
+ str,
1601
+ typer.Option("--format", "-f", help="Output format (html, pdf)"),
1602
+ ] = "html",
1603
+ ) -> None:
1604
+ """Generate HTML report from profile data.
1605
+
1606
+ This creates a static, self-contained HTML report that can be:
1607
+ - Saved as CI/CD artifact
1608
+ - Shared via email or Slack
1609
+ - Viewed offline in any browser
1610
+
1611
+ Examples:
1612
+ # Basic usage
1613
+ truthound docs generate profile.json -o report.html
1614
+
1615
+ # With custom title and theme
1616
+ truthound docs generate profile.json -o report.html --title "Q4 Data Report" --theme dark
1617
+
1618
+ # Using different chart library
1619
+ truthound docs generate profile.json -o report.html --charts chartjs
1620
+
1621
+ # Export to PDF (requires weasyprint)
1622
+ truthound docs generate profile.json -o report.pdf --format pdf
1623
+ """
1624
+ if not profile_file.exists():
1625
+ typer.echo(f"Error: Profile file not found: {profile_file}", err=True)
1626
+ raise typer.Exit(1)
1627
+
1628
+ # Default output path
1629
+ if not output:
1630
+ output = profile_file.with_suffix(f".{format}")
1631
+
1632
+ try:
1633
+ from truthound.datadocs import (
1634
+ generate_html_report,
1635
+ export_to_pdf,
1636
+ ReportTheme,
1637
+ ChartLibrary,
1638
+ )
1639
+
1640
+ # Load profile
1641
+ with open(profile_file, "r", encoding="utf-8") as f:
1642
+ profile = json.load(f)
1643
+
1644
+ typer.echo(f"Generating {format.upper()} report...")
1645
+ typer.echo(f" Profile: {profile_file}")
1646
+ typer.echo(f" Theme: {theme}")
1647
+ typer.echo(f" Charts: {chart_library}")
1648
+
1649
+ if format == "html":
1650
+ html_content = generate_html_report(
1651
+ profile=profile,
1652
+ title=title,
1653
+ subtitle=subtitle,
1654
+ theme=theme,
1655
+ chart_library=chart_library,
1656
+ output_path=output,
1657
+ )
1658
+ typer.echo(f"\nReport saved to: {output}")
1659
+ typer.echo(f" Size: {len(html_content):,} bytes")
1660
+
1661
+ elif format == "pdf":
1662
+ try:
1663
+ output_path = export_to_pdf(
1664
+ profile=profile,
1665
+ output_path=output,
1666
+ title=title,
1667
+ subtitle=subtitle,
1668
+ theme=theme,
1669
+ chart_library="svg", # SVG works best for PDF
1670
+ )
1671
+ typer.echo(f"\nPDF report saved to: {output_path}")
1672
+ except ImportError:
1673
+ typer.echo(
1674
+ "Error: PDF export requires weasyprint. "
1675
+ "Install with: pip install truthound[pdf]",
1676
+ err=True,
1677
+ )
1678
+ raise typer.Exit(1)
1679
+
1680
+ else:
1681
+ typer.echo(f"Error: Unsupported format '{format}'", err=True)
1682
+ raise typer.Exit(1)
1683
+
1684
+ except Exception as e:
1685
+ typer.echo(f"Error: {e}", err=True)
1686
+ raise typer.Exit(1)
1687
+
1688
+
1689
+ @docs_app.command(name="themes")
1690
+ def docs_themes_cmd() -> None:
1691
+ """List available report themes."""
1692
+ try:
1693
+ from truthound.datadocs import get_available_themes
1694
+
1695
+ typer.echo("Available report themes:")
1696
+ typer.echo("")
1697
+
1698
+ themes_info = {
1699
+ "light": "Clean and bright, suitable for most use cases",
1700
+ "dark": "Dark mode with vibrant colors, easy on the eyes",
1701
+ "professional": "Corporate style, subdued colors (default)",
1702
+ "minimal": "Minimalist design with monochrome accents",
1703
+ "modern": "Contemporary design with vibrant gradients",
1704
+ }
1705
+
1706
+ for theme in get_available_themes():
1707
+ desc = themes_info.get(theme, "")
1708
+ typer.echo(f" {theme:14} - {desc}")
1709
+
1710
+ except Exception as e:
1711
+ typer.echo(f"Error: {e}", err=True)
1712
+ raise typer.Exit(1)
1713
+
1714
+
1715
+ @app.command(name="dashboard")
1716
+ def dashboard_cmd(
1717
+ profile: Annotated[
1718
+ Optional[Path],
1719
+ typer.Option("--profile", "-p", help="Path to profile JSON file"),
1720
+ ] = None,
1721
+ port: Annotated[
1722
+ int,
1723
+ typer.Option("--port", help="Server port"),
1724
+ ] = 8080,
1725
+ host: Annotated[
1726
+ str,
1727
+ typer.Option("--host", help="Server host"),
1728
+ ] = "localhost",
1729
+ title: Annotated[
1730
+ str,
1731
+ typer.Option("--title", "-t", help="Dashboard title"),
1732
+ ] = "Truthound Dashboard",
1733
+ debug: Annotated[
1734
+ bool,
1735
+ typer.Option("--debug", help="Enable debug mode"),
1736
+ ] = False,
1737
+ ) -> None:
1738
+ """Launch interactive dashboard for data exploration.
1739
+
1740
+ This requires the dashboard extra to be installed:
1741
+ pip install truthound[dashboard]
1742
+
1743
+ The dashboard provides:
1744
+ - Interactive data exploration
1745
+ - Column filtering and search
1746
+ - Real-time quality metrics
1747
+ - Pattern visualization
1748
+
1749
+ Examples:
1750
+ # Launch with profile
1751
+ truthound dashboard --profile profile.json
1752
+
1753
+ # Custom port and title
1754
+ truthound dashboard --profile profile.json --port 3000 --title "My Dashboard"
1755
+ """
1756
+ try:
1757
+ from truthound.datadocs import launch_dashboard
1758
+
1759
+ if profile and not profile.exists():
1760
+ typer.echo(f"Error: Profile file not found: {profile}", err=True)
1761
+ raise typer.Exit(1)
1762
+
1763
+ typer.echo(f"Launching dashboard on http://{host}:{port}")
1764
+ if profile:
1765
+ typer.echo(f" Profile: {profile}")
1766
+
1767
+ launch_dashboard(
1768
+ profile_path=profile,
1769
+ port=port,
1770
+ host=host,
1771
+ title=title,
1772
+ debug=debug,
1773
+ )
1774
+
1775
+ except ImportError:
1776
+ typer.echo(
1777
+ "Error: Dashboard requires additional dependencies. "
1778
+ "Install with: pip install truthound[dashboard]",
1779
+ err=True,
1780
+ )
1781
+ raise typer.Exit(1)
1782
+ except Exception as e:
1783
+ typer.echo(f"Error: {e}", err=True)
1784
+ raise typer.Exit(1)
1785
+
1786
+
1787
+ # =============================================================================
1788
+ # Phase 10: ML, Lineage, and Realtime Commands
1789
+ # =============================================================================
1790
+
1791
+ # ML subcommand group
1792
+ ml_app = typer.Typer(
1793
+ name="ml",
1794
+ help="Machine learning based validation commands",
1795
+ )
1796
+ app.add_typer(ml_app, name="ml")
1797
+
1798
+
1799
+ @ml_app.command(name="anomaly")
1800
+ def ml_anomaly_cmd(
1801
+ file: Annotated[Path, typer.Argument(help="Path to the data file")],
1802
+ method: Annotated[
1803
+ str,
1804
+ typer.Option("--method", "-m", help="Detection method (zscore, iqr, mad, isolation_forest)"),
1805
+ ] = "zscore",
1806
+ contamination: Annotated[
1807
+ float,
1808
+ typer.Option("--contamination", "-c", help="Expected proportion of outliers (0.0 to 0.5)"),
1809
+ ] = 0.1,
1810
+ columns: Annotated[
1811
+ Optional[str],
1812
+ typer.Option("--columns", help="Comma-separated columns to analyze"),
1813
+ ] = None,
1814
+ output: Annotated[
1815
+ Optional[Path],
1816
+ typer.Option("--output", "-o", help="Output file path for results"),
1817
+ ] = None,
1818
+ format: Annotated[
1819
+ str,
1820
+ typer.Option("--format", "-f", help="Output format (console, json)"),
1821
+ ] = "console",
1822
+ ) -> None:
1823
+ """Detect anomalies in data using ML methods.
1824
+
1825
+ Examples:
1826
+ truthound ml anomaly data.csv
1827
+ truthound ml anomaly data.csv --method isolation_forest --contamination 0.05
1828
+ truthound ml anomaly data.csv --columns "amount,price" --output anomalies.json
1829
+ """
1830
+ import polars as pl
1831
+ from truthound.ml import (
1832
+ ZScoreAnomalyDetector,
1833
+ IQRAnomalyDetector,
1834
+ MADAnomalyDetector,
1835
+ IsolationForestDetector,
1836
+ )
1837
+ from truthound.ml.anomaly_models.statistical import StatisticalConfig
1838
+ from truthound.ml.anomaly_models.isolation_forest import IsolationForestConfig
1839
+
1840
+ if not file.exists():
1841
+ typer.echo(f"Error: File not found: {file}", err=True)
1842
+ raise typer.Exit(1)
1843
+
1844
+ try:
1845
+ # Load data
1846
+ df = pl.read_csv(file) if str(file).endswith(".csv") else pl.read_parquet(file)
1847
+
1848
+ # Parse columns
1849
+ cols = [c.strip() for c in columns.split(",")] if columns else None
1850
+
1851
+ # Select detector and appropriate config
1852
+ # Use min_samples=10 for CLI to allow smaller datasets
1853
+ if method == "isolation_forest":
1854
+ config = IsolationForestConfig(
1855
+ contamination=contamination, columns=cols, min_samples=10
1856
+ )
1857
+ detector = IsolationForestDetector(config=config)
1858
+ elif method in ("zscore", "iqr", "mad"):
1859
+ config = StatisticalConfig(
1860
+ contamination=contamination, columns=cols, min_samples=10
1861
+ )
1862
+ detector_map = {
1863
+ "zscore": ZScoreAnomalyDetector,
1864
+ "iqr": IQRAnomalyDetector,
1865
+ "mad": MADAnomalyDetector,
1866
+ }
1867
+ detector = detector_map[method](config=config)
1868
+ else:
1869
+ typer.echo(f"Error: Unknown method '{method}'. Available: zscore, iqr, mad, isolation_forest", err=True)
1870
+ raise typer.Exit(1)
1871
+ detector.fit(df.lazy())
1872
+ result = detector.predict(df.lazy())
1873
+
1874
+ # Output results
1875
+ if format == "json":
1876
+ output_data = result.to_dict()
1877
+ if output:
1878
+ with open(output, "w") as f:
1879
+ json.dump(output_data, f, indent=2)
1880
+ typer.echo(f"Results saved to {output}")
1881
+ else:
1882
+ typer.echo(json.dumps(output_data, indent=2))
1883
+ else:
1884
+ typer.echo(f"\nAnomaly Detection Results ({method})")
1885
+ typer.echo("=" * 50)
1886
+ typer.echo(f"Total points: {result.total_points}")
1887
+ typer.echo(f"Anomalies found: {result.anomaly_count}")
1888
+ typer.echo(f"Anomaly ratio: {result.anomaly_ratio:.2%}")
1889
+ typer.echo(f"Threshold used: {result.threshold_used:.4f}")
1890
+
1891
+ if result.anomaly_count > 0:
1892
+ typer.echo("\nTop anomalies:")
1893
+ anomalies = sorted(result.get_anomalies(), key=lambda x: x.score, reverse=True)[:10]
1894
+ for a in anomalies:
1895
+ typer.echo(f" Index {a.index}: score={a.score:.4f}, confidence={a.confidence:.2%}")
1896
+
1897
+ except Exception as e:
1898
+ typer.echo(f"Error: {e}", err=True)
1899
+ raise typer.Exit(1)
1900
+
1901
+
1902
+ @ml_app.command(name="drift")
1903
+ def ml_drift_cmd(
1904
+ baseline: Annotated[Path, typer.Argument(help="Path to baseline/reference data file")],
1905
+ current: Annotated[Path, typer.Argument(help="Path to current data file")],
1906
+ method: Annotated[
1907
+ str,
1908
+ typer.Option("--method", "-m", help="Detection method (distribution, feature, multivariate)"),
1909
+ ] = "feature",
1910
+ threshold: Annotated[
1911
+ float,
1912
+ typer.Option("--threshold", "-t", help="Drift detection threshold"),
1913
+ ] = 0.1,
1914
+ columns: Annotated[
1915
+ Optional[str],
1916
+ typer.Option("--columns", help="Comma-separated columns to analyze"),
1917
+ ] = None,
1918
+ output: Annotated[
1919
+ Optional[Path],
1920
+ typer.Option("--output", "-o", help="Output file path"),
1921
+ ] = None,
1922
+ ) -> None:
1923
+ """Detect data drift between baseline and current datasets.
1924
+
1925
+ Examples:
1926
+ truthound ml drift baseline.csv current.csv
1927
+ truthound ml drift ref.parquet new.parquet --method multivariate
1928
+ truthound ml drift old.csv new.csv --threshold 0.2 --output drift_report.json
1929
+ """
1930
+ import polars as pl
1931
+ from truthound.ml.drift_detection import (
1932
+ DistributionDriftDetector,
1933
+ FeatureDriftDetector,
1934
+ MultivariateDriftDetector,
1935
+ )
1936
+
1937
+ if not baseline.exists():
1938
+ typer.echo(f"Error: Baseline file not found: {baseline}", err=True)
1939
+ raise typer.Exit(1)
1940
+ if not current.exists():
1941
+ typer.echo(f"Error: Current file not found: {current}", err=True)
1942
+ raise typer.Exit(1)
1943
+
1944
+ try:
1945
+ # Load data
1946
+ read_func = lambda p: pl.read_csv(p) if str(p).endswith(".csv") else pl.read_parquet(p)
1947
+ baseline_df = read_func(baseline)
1948
+ current_df = read_func(current)
1949
+
1950
+ detector_map = {
1951
+ "distribution": DistributionDriftDetector,
1952
+ "feature": FeatureDriftDetector,
1953
+ "multivariate": MultivariateDriftDetector,
1954
+ }
1955
+
1956
+ if method not in detector_map:
1957
+ typer.echo(f"Error: Unknown method '{method}'. Available: {list(detector_map.keys())}", err=True)
1958
+ raise typer.Exit(1)
1959
+
1960
+ detector = detector_map[method](threshold=threshold)
1961
+ detector.fit(baseline_df.lazy())
1962
+
1963
+ cols = [c.strip() for c in columns.split(",")] if columns else None
1964
+ result = detector.detect(baseline_df.lazy(), current_df.lazy(), columns=cols)
1965
+
1966
+ # Output results
1967
+ typer.echo(f"\nDrift Detection Results ({method})")
1968
+ typer.echo("=" * 50)
1969
+ typer.echo(f"Drift detected: {'YES' if result.is_drifted else 'NO'}")
1970
+ typer.echo(f"Drift score: {result.drift_score:.4f}")
1971
+ typer.echo(f"Drift type: {result.drift_type}")
1972
+
1973
+ if result.column_scores:
1974
+ typer.echo("\nPer-column drift scores:")
1975
+ for col, score in sorted(result.column_scores, key=lambda x: x[1], reverse=True):
1976
+ status = "[DRIFTED]" if score >= threshold else ""
1977
+ typer.echo(f" {col}: {score:.4f} {status}")
1978
+
1979
+ if output:
1980
+ with open(output, "w") as f:
1981
+ json.dump(result.to_dict(), f, indent=2)
1982
+ typer.echo(f"\nResults saved to {output}")
1983
+
1984
+ except Exception as e:
1985
+ typer.echo(f"Error: {e}", err=True)
1986
+ raise typer.Exit(1)
1987
+
1988
+
1989
+ @ml_app.command(name="learn-rules")
1990
+ def ml_learn_rules_cmd(
1991
+ file: Annotated[Path, typer.Argument(help="Path to the data file")],
1992
+ output: Annotated[
1993
+ Path,
1994
+ typer.Option("--output", "-o", help="Output file for learned rules"),
1995
+ ] = Path("learned_rules.json"),
1996
+ strictness: Annotated[
1997
+ str,
1998
+ typer.Option("--strictness", "-s", help="Rule strictness (loose, medium, strict)"),
1999
+ ] = "medium",
2000
+ min_confidence: Annotated[
2001
+ float,
2002
+ typer.Option("--min-confidence", help="Minimum rule confidence"),
2003
+ ] = 0.9,
2004
+ max_rules: Annotated[
2005
+ int,
2006
+ typer.Option("--max-rules", help="Maximum number of rules to generate"),
2007
+ ] = 100,
2008
+ ) -> None:
2009
+ """Learn validation rules from data.
2010
+
2011
+ Examples:
2012
+ truthound ml learn-rules data.csv
2013
+ truthound ml learn-rules data.csv --strictness strict --min-confidence 0.95
2014
+ truthound ml learn-rules data.parquet --output my_rules.json
2015
+ """
2016
+ import polars as pl
2017
+ from truthound.ml.rule_learning import DataProfileRuleLearner, PatternRuleLearner
2018
+
2019
+ if not file.exists():
2020
+ typer.echo(f"Error: File not found: {file}", err=True)
2021
+ raise typer.Exit(1)
2022
+
2023
+ try:
2024
+ df = pl.read_csv(file) if str(file).endswith(".csv") else pl.read_parquet(file)
2025
+
2026
+ typer.echo(f"Learning rules from {file}...")
2027
+ typer.echo(f" Rows: {len(df):,}, Columns: {len(df.columns)}")
2028
+
2029
+ # Use profile learner
2030
+ learner = DataProfileRuleLearner(
2031
+ strictness=strictness,
2032
+ min_confidence=min_confidence,
2033
+ max_rules=max_rules,
2034
+ )
2035
+
2036
+ result = learner.learn_rules(df.lazy())
2037
+
2038
+ typer.echo(f"\nLearned {len(result.rules)} rules ({result.filtered_rules} filtered)")
2039
+ typer.echo(f"Learning time: {result.learning_time_ms:.1f}ms")
2040
+
2041
+ # Show rules by type
2042
+ rule_types = {}
2043
+ for rule in result.rules:
2044
+ rule_types[rule.rule_type] = rule_types.get(rule.rule_type, 0) + 1
2045
+
2046
+ typer.echo("\nRules by type:")
2047
+ for rtype, count in sorted(rule_types.items(), key=lambda x: x[1], reverse=True):
2048
+ typer.echo(f" {rtype}: {count}")
2049
+
2050
+ # Save rules
2051
+ with open(output, "w") as f:
2052
+ json.dump(result.to_dict(), f, indent=2)
2053
+ typer.echo(f"\nRules saved to {output}")
2054
+
2055
+ except Exception as e:
2056
+ typer.echo(f"Error: {e}", err=True)
2057
+ raise typer.Exit(1)
2058
+
2059
+
2060
+ # Lineage subcommand group
2061
+ lineage_app = typer.Typer(
2062
+ name="lineage",
2063
+ help="Data lineage tracking and analysis commands",
2064
+ )
2065
+ app.add_typer(lineage_app, name="lineage")
2066
+
2067
+
2068
+ @lineage_app.command(name="show")
2069
+ def lineage_show_cmd(
2070
+ lineage_file: Annotated[Path, typer.Argument(help="Path to lineage JSON file")],
2071
+ node: Annotated[
2072
+ Optional[str],
2073
+ typer.Option("--node", "-n", help="Show lineage for specific node"),
2074
+ ] = None,
2075
+ direction: Annotated[
2076
+ str,
2077
+ typer.Option("--direction", "-d", help="Direction (upstream, downstream, both)"),
2078
+ ] = "both",
2079
+ format: Annotated[
2080
+ str,
2081
+ typer.Option("--format", "-f", help="Output format (console, json, dot)"),
2082
+ ] = "console",
2083
+ ) -> None:
2084
+ """Display lineage information.
2085
+
2086
+ Examples:
2087
+ truthound lineage show lineage.json
2088
+ truthound lineage show lineage.json --node my_table --direction upstream
2089
+ truthound lineage show lineage.json --format dot > lineage.dot
2090
+ """
2091
+ from truthound.lineage import LineageGraph
2092
+
2093
+ if not lineage_file.exists():
2094
+ typer.echo(f"Error: File not found: {lineage_file}", err=True)
2095
+ raise typer.Exit(1)
2096
+
2097
+ try:
2098
+ graph = LineageGraph.load(lineage_file)
2099
+
2100
+ if node:
2101
+ if not graph.has_node(node):
2102
+ typer.echo(f"Error: Node '{node}' not found", err=True)
2103
+ raise typer.Exit(1)
2104
+
2105
+ node_obj = graph.get_node(node)
2106
+ typer.echo(f"\nLineage for: {node}")
2107
+ typer.echo(f"Type: {node_obj.node_type.value}")
2108
+
2109
+ if direction in ("upstream", "both"):
2110
+ upstream = graph.get_upstream(node)
2111
+ typer.echo(f"\nUpstream ({len(upstream)} nodes):")
2112
+ for n in upstream:
2113
+ typer.echo(f" <- {n.name} ({n.node_type.value})")
2114
+
2115
+ if direction in ("downstream", "both"):
2116
+ downstream = graph.get_downstream(node)
2117
+ typer.echo(f"\nDownstream ({len(downstream)} nodes):")
2118
+ for n in downstream:
2119
+ typer.echo(f" -> {n.name} ({n.node_type.value})")
2120
+ else:
2121
+ typer.echo(f"\nLineage Graph Summary")
2122
+ typer.echo("=" * 40)
2123
+ typer.echo(f"Nodes: {graph.node_count}")
2124
+ typer.echo(f"Edges: {graph.edge_count}")
2125
+
2126
+ roots = graph.get_roots()
2127
+ typer.echo(f"\nRoot nodes ({len(roots)}):")
2128
+ for r in roots[:10]:
2129
+ typer.echo(f" {r.name} ({r.node_type.value})")
2130
+
2131
+ leaves = graph.get_leaves()
2132
+ typer.echo(f"\nLeaf nodes ({len(leaves)}):")
2133
+ for l in leaves[:10]:
2134
+ typer.echo(f" {l.name} ({l.node_type.value})")
2135
+
2136
+ except Exception as e:
2137
+ typer.echo(f"Error: {e}", err=True)
2138
+ raise typer.Exit(1)
2139
+
2140
+
2141
+ @lineage_app.command(name="impact")
2142
+ def lineage_impact_cmd(
2143
+ lineage_file: Annotated[Path, typer.Argument(help="Path to lineage JSON file")],
2144
+ node: Annotated[str, typer.Argument(help="Node to analyze impact for")],
2145
+ max_depth: Annotated[
2146
+ int,
2147
+ typer.Option("--max-depth", help="Maximum depth for impact analysis"),
2148
+ ] = -1,
2149
+ output: Annotated[
2150
+ Optional[Path],
2151
+ typer.Option("--output", "-o", help="Output file for results"),
2152
+ ] = None,
2153
+ ) -> None:
2154
+ """Analyze impact of changes to a data asset.
2155
+
2156
+ Examples:
2157
+ truthound lineage impact lineage.json raw_data
2158
+ truthound lineage impact lineage.json my_table --max-depth 3
2159
+ """
2160
+ from truthound.lineage import LineageGraph, ImpactAnalyzer
2161
+
2162
+ if not lineage_file.exists():
2163
+ typer.echo(f"Error: File not found: {lineage_file}", err=True)
2164
+ raise typer.Exit(1)
2165
+
2166
+ try:
2167
+ graph = LineageGraph.load(lineage_file)
2168
+ analyzer = ImpactAnalyzer(graph)
2169
+
2170
+ result = analyzer.analyze_impact(node, max_depth=max_depth)
2171
+
2172
+ typer.echo(result.summary())
2173
+
2174
+ if result.affected_nodes:
2175
+ typer.echo("\nAffected nodes:")
2176
+ for affected in result.affected_nodes[:20]:
2177
+ level_marker = {
2178
+ "critical": "[!!!]",
2179
+ "high": "[!!]",
2180
+ "medium": "[!]",
2181
+ "low": "[-]",
2182
+ "none": "[ ]",
2183
+ }.get(affected.impact_level.value, "")
2184
+ typer.echo(f" {level_marker} {affected.node.name} (depth={affected.distance})")
2185
+
2186
+ if output:
2187
+ with open(output, "w") as f:
2188
+ json.dump(result.to_dict(), f, indent=2)
2189
+ typer.echo(f"\nResults saved to {output}")
2190
+
2191
+ except Exception as e:
2192
+ typer.echo(f"Error: {e}", err=True)
2193
+ raise typer.Exit(1)
2194
+
2195
+
2196
+ # Realtime subcommand group
2197
+ realtime_app = typer.Typer(
2198
+ name="realtime",
2199
+ help="Real-time and streaming validation commands",
2200
+ )
2201
+ app.add_typer(realtime_app, name="realtime")
2202
+
2203
+
2204
+ @realtime_app.command(name="validate")
2205
+ def realtime_validate_cmd(
2206
+ source: Annotated[str, typer.Argument(help="Streaming source (mock, kafka:topic, kinesis:stream)")],
2207
+ validators: Annotated[
2208
+ Optional[str],
2209
+ typer.Option("--validators", "-v", help="Comma-separated validators"),
2210
+ ] = None,
2211
+ batch_size: Annotated[
2212
+ int,
2213
+ typer.Option("--batch-size", "-b", help="Batch size"),
2214
+ ] = 1000,
2215
+ max_batches: Annotated[
2216
+ int,
2217
+ typer.Option("--max-batches", help="Maximum batches to process (0=unlimited)"),
2218
+ ] = 10,
2219
+ output: Annotated[
2220
+ Optional[Path],
2221
+ typer.Option("--output", "-o", help="Output file for results"),
2222
+ ] = None,
2223
+ ) -> None:
2224
+ """Validate streaming data in real-time.
2225
+
2226
+ Examples:
2227
+ truthound realtime validate mock --max-batches 5
2228
+ truthound realtime validate mock --validators null,range --batch-size 500
2229
+ """
2230
+ from truthound.realtime import MockStreamingSource, StreamingValidator, StreamingConfig
2231
+
2232
+ try:
2233
+ # Parse source
2234
+ if source.startswith("mock"):
2235
+ stream = MockStreamingSource(
2236
+ records_per_batch=batch_size,
2237
+ num_batches=max_batches if max_batches > 0 else 100,
2238
+ )
2239
+ else:
2240
+ typer.echo(f"Source '{source}' requires additional configuration.")
2241
+ typer.echo("For now, using mock source for demonstration.")
2242
+ stream = MockStreamingSource(
2243
+ records_per_batch=batch_size,
2244
+ num_batches=max_batches if max_batches > 0 else 100,
2245
+ )
2246
+
2247
+ validator_list = [v.strip() for v in validators.split(",")] if validators else None
2248
+ config = StreamingConfig(batch_size=batch_size)
2249
+ streaming_validator = StreamingValidator(
2250
+ validators=validator_list,
2251
+ config=config,
2252
+ )
2253
+
2254
+ results = []
2255
+ with stream:
2256
+ typer.echo(f"Starting streaming validation...")
2257
+ typer.echo(f" Source: {source}")
2258
+ typer.echo(f" Batch size: {batch_size}")
2259
+ typer.echo(f" Validators: {validator_list or 'all'}")
2260
+ typer.echo()
2261
+
2262
+ for result in streaming_validator.validate_stream(stream, max_batches=max_batches if max_batches > 0 else None):
2263
+ status = "[ISSUES]" if result.has_issues else "[OK]"
2264
+ typer.echo(f"Batch {result.batch_id}: {result.record_count} records, {result.issue_count} issues {status}")
2265
+ results.append(result.to_dict())
2266
+
2267
+ stats = streaming_validator.get_stats()
2268
+ typer.echo(f"\nSummary")
2269
+ typer.echo("=" * 40)
2270
+ typer.echo(f"Batches processed: {stats['batch_count']}")
2271
+ typer.echo(f"Total records: {stats['total_records']}")
2272
+ typer.echo(f"Total issues: {stats['total_issues']}")
2273
+ typer.echo(f"Issue rate: {stats['issue_rate']:.2%}")
2274
+ typer.echo(f"Avg processing time: {stats['avg_processing_time_ms']:.1f}ms")
2275
+
2276
+ if output:
2277
+ with open(output, "w") as f:
2278
+ json.dump({"batches": results, "stats": stats}, f, indent=2)
2279
+ typer.echo(f"\nResults saved to {output}")
2280
+
2281
+ except Exception as e:
2282
+ typer.echo(f"Error: {e}", err=True)
2283
+ raise typer.Exit(1)
2284
+
2285
+
2286
+ def _discover_and_register_plugins() -> None:
2287
+ """Discover and register CLI plugins from entry points.
2288
+
2289
+ This function discovers plugins registered under the 'truthound.cli'
2290
+ entry point group. This allows external packages (like truthound-dashboard)
2291
+ to extend the CLI with additional commands.
2292
+
2293
+ Entry point format in pyproject.toml:
2294
+ [project.entry-points."truthound.cli"]
2295
+ serve = "truthound_dashboard.cli:register_commands"
2296
+
2297
+ The registered module must have either:
2298
+ - A `register_commands(app: typer.Typer)` function
2299
+ - An `app` attribute that is a Typer instance
2300
+ """
2301
+ import logging
2302
+
2303
+ logger = logging.getLogger(__name__)
2304
+
2305
+ try:
2306
+ from importlib.metadata import entry_points
2307
+
2308
+ # Get entry points for truthound.cli group
2309
+ eps = entry_points(group="truthound.cli")
2310
+
2311
+ for ep in eps:
2312
+ try:
2313
+ # Load the module
2314
+ module = ep.load()
2315
+
2316
+ # Check for register_commands function
2317
+ if hasattr(module, "register_commands"):
2318
+ module.register_commands(app)
2319
+ logger.debug(f"Registered CLI plugin via register_commands: {ep.name}")
2320
+
2321
+ # Check for app attribute (sub-typer)
2322
+ elif hasattr(module, "app"):
2323
+ app.add_typer(module.app, name=ep.name)
2324
+ logger.debug(f"Registered CLI plugin via app typer: {ep.name}")
2325
+
2326
+ # Check if the module itself is a callable (register function)
2327
+ elif callable(module):
2328
+ module(app)
2329
+ logger.debug(f"Registered CLI plugin via callable: {ep.name}")
2330
+
2331
+ else:
2332
+ logger.warning(
2333
+ f"CLI plugin '{ep.name}' has no register_commands, app, "
2334
+ "or is not callable"
2335
+ )
2336
+
2337
+ except Exception as e:
2338
+ # Log but don't fail - plugins shouldn't break core functionality
2339
+ logger.debug(f"Failed to load CLI plugin '{ep.name}': {e}")
2340
+
2341
+ except Exception as e:
2342
+ # Entry points not available or other error - silently continue
2343
+ logger.debug(f"Entry point discovery not available: {e}")
2344
+
2345
+
2346
+ # Discover and register CLI plugins from entry points
2347
+ # This runs at module load time to ensure plugins are available
2348
+ # when the CLI is invoked
2349
+ _discover_and_register_plugins()
2350
+
2351
+
2352
+ def main() -> None:
2353
+ """Main entry point for the CLI."""
2354
+ app()
2355
+
2356
+
2357
+ if __name__ == "__main__":
2358
+ main()