truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,857 @@
1
+ """Validation suite generator.
2
+
3
+ This module provides the main ValidationSuiteGenerator that combines
4
+ all rule generators to create a complete validation suite from a profile.
5
+
6
+ Key Features:
7
+ - Generates validation rules from profile results
8
+ - Supports multiple profile types (TableProfile, ProfileReport, dict)
9
+ - Configurable strictness levels
10
+ - Filtering by category and confidence
11
+ - Export to YAML, JSON, and Python code
12
+
13
+ Example:
14
+ from truthound.profiler import profile_file, generate_suite
15
+
16
+ # Using TableProfile (recommended)
17
+ profile = profile_file("data.parquet")
18
+ suite = generate_suite(profile)
19
+
20
+ # Using ProfileReport from th.profile()
21
+ import truthound as th
22
+ profile_report = th.profile("data.csv")
23
+ suite = generate_suite(profile_report)
24
+
25
+ # Using dict
26
+ profile_dict = profile.to_dict()
27
+ suite = generate_suite(profile_dict)
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import json
33
+ import logging
34
+ from dataclasses import dataclass, field
35
+ from datetime import datetime
36
+ from pathlib import Path
37
+ from typing import Any, Callable, Sequence, TYPE_CHECKING, Union
38
+
39
+ from truthound.profiler.base import (
40
+ ColumnProfile,
41
+ DataType,
42
+ ProfilerConfig,
43
+ Strictness,
44
+ TableProfile,
45
+ )
46
+ from truthound.profiler.generators.base import (
47
+ GeneratedRule,
48
+ RuleCategory,
49
+ RuleConfidence,
50
+ RuleGenerator,
51
+ rule_generator_registry,
52
+ )
53
+
54
+ if TYPE_CHECKING:
55
+ from truthound.validators.base import Validator
56
+ from truthound.profiler.integration.protocols import ExecutionContext, ExecutionResult
57
+ from truthound.report import ProfileReport
58
+
59
+ logger = logging.getLogger(__name__)
60
+
61
+ # Type alias for supported profile types
62
+ ProfileInput = Union["TableProfile", "ProfileReport", dict[str, Any]]
63
+
64
+
65
+ @dataclass(frozen=True)
66
+ class ValidationSuite:
67
+ """A complete validation suite generated from a profile.
68
+
69
+ This is an immutable collection of generated rules that can be
70
+ exported, filtered, and converted to actual validators.
71
+ """
72
+
73
+ name: str
74
+ rules: tuple[GeneratedRule, ...] = field(default_factory=tuple)
75
+ source_profile: str = "" # Reference to source profile
76
+ strictness: Strictness = Strictness.MEDIUM
77
+ metadata: dict[str, Any] = field(default_factory=dict)
78
+
79
+ def __len__(self) -> int:
80
+ return len(self.rules)
81
+
82
+ def __iter__(self):
83
+ return iter(self.rules)
84
+
85
+ def filter_by_category(
86
+ self,
87
+ *categories: RuleCategory,
88
+ ) -> "ValidationSuite":
89
+ """Filter rules by category."""
90
+ filtered = tuple(r for r in self.rules if r.category in categories)
91
+ return ValidationSuite(
92
+ name=self.name,
93
+ rules=filtered,
94
+ source_profile=self.source_profile,
95
+ strictness=self.strictness,
96
+ metadata=self.metadata,
97
+ )
98
+
99
+ def filter_by_confidence(
100
+ self,
101
+ min_confidence: RuleConfidence,
102
+ ) -> "ValidationSuite":
103
+ """Filter rules by minimum confidence level."""
104
+ confidence_order = {
105
+ RuleConfidence.LOW: 0,
106
+ RuleConfidence.MEDIUM: 1,
107
+ RuleConfidence.HIGH: 2,
108
+ }
109
+ min_level = confidence_order[min_confidence]
110
+
111
+ filtered = tuple(
112
+ r for r in self.rules
113
+ if confidence_order[r.confidence] >= min_level
114
+ )
115
+ return ValidationSuite(
116
+ name=self.name,
117
+ rules=filtered,
118
+ source_profile=self.source_profile,
119
+ strictness=self.strictness,
120
+ metadata=self.metadata,
121
+ )
122
+
123
+ def filter_by_columns(self, *columns: str) -> "ValidationSuite":
124
+ """Filter rules that apply to specific columns."""
125
+ column_set = set(columns)
126
+ filtered = tuple(
127
+ r for r in self.rules
128
+ if not r.columns or any(c in column_set for c in r.columns)
129
+ )
130
+ return ValidationSuite(
131
+ name=self.name,
132
+ rules=filtered,
133
+ source_profile=self.source_profile,
134
+ strictness=self.strictness,
135
+ metadata=self.metadata,
136
+ )
137
+
138
+ def exclude_categories(
139
+ self,
140
+ *categories: RuleCategory,
141
+ ) -> "ValidationSuite":
142
+ """Exclude rules in specific categories."""
143
+ excluded = set(categories)
144
+ filtered = tuple(r for r in self.rules if r.category not in excluded)
145
+ return ValidationSuite(
146
+ name=self.name,
147
+ rules=filtered,
148
+ source_profile=self.source_profile,
149
+ strictness=self.strictness,
150
+ metadata=self.metadata,
151
+ )
152
+
153
+ def to_dict(self) -> dict[str, Any]:
154
+ """Convert to dictionary for serialization."""
155
+ return {
156
+ "name": self.name,
157
+ "rules": [r.to_dict() for r in self.rules],
158
+ "source_profile": self.source_profile,
159
+ "strictness": self.strictness.value,
160
+ "metadata": self.metadata,
161
+ "summary": {
162
+ "total_rules": len(self.rules),
163
+ "by_category": self._count_by_category(),
164
+ "by_confidence": self._count_by_confidence(),
165
+ },
166
+ }
167
+
168
+ def _count_by_category(self) -> dict[str, int]:
169
+ counts: dict[str, int] = {}
170
+ for rule in self.rules:
171
+ cat = rule.category.value
172
+ counts[cat] = counts.get(cat, 0) + 1
173
+ return counts
174
+
175
+ def _count_by_confidence(self) -> dict[str, int]:
176
+ counts: dict[str, int] = {}
177
+ for rule in self.rules:
178
+ conf = rule.confidence.value
179
+ counts[conf] = counts.get(conf, 0) + 1
180
+ return counts
181
+
182
+ def to_yaml(self) -> str:
183
+ """Convert to YAML format for human-readable output."""
184
+ lines = [
185
+ f"# Validation Suite: {self.name}",
186
+ f"# Strictness: {self.strictness.value}",
187
+ f"# Total rules: {len(self.rules)}",
188
+ "",
189
+ "rules:",
190
+ ]
191
+
192
+ for rule in self.rules:
193
+ lines.append(f" - name: {rule.name}")
194
+ lines.append(f" validator: {rule.validator_class}")
195
+ lines.append(f" category: {rule.category.value}")
196
+ lines.append(f" confidence: {rule.confidence.value}")
197
+
198
+ if rule.columns:
199
+ lines.append(f" columns: {list(rule.columns)}")
200
+
201
+ if rule.parameters:
202
+ lines.append(" parameters:")
203
+ for k, v in rule.parameters.items():
204
+ lines.append(f" {k}: {v}")
205
+
206
+ if rule.mostly is not None:
207
+ lines.append(f" mostly: {rule.mostly}")
208
+
209
+ if rule.description:
210
+ lines.append(f" description: \"{rule.description}\"")
211
+
212
+ lines.append("")
213
+
214
+ return "\n".join(lines)
215
+
216
+ def execute(
217
+ self,
218
+ data: Any,
219
+ *,
220
+ parallel: bool = False,
221
+ fail_fast: bool = False,
222
+ max_workers: int | None = None,
223
+ timeout_seconds: float | None = None,
224
+ context: "ExecutionContext | None" = None,
225
+ ) -> "ExecutionResult":
226
+ """Execute the validation suite against data.
227
+
228
+ This method provides a convenient way to run all validators
229
+ in the suite against the provided data.
230
+
231
+ Args:
232
+ data: Data to validate (LazyFrame, DataFrame, or file path).
233
+ parallel: Whether to run validators in parallel.
234
+ fail_fast: Whether to stop on first failure.
235
+ max_workers: Maximum number of parallel workers.
236
+ timeout_seconds: Maximum execution time per validator.
237
+ context: Pre-configured execution context (overrides other params).
238
+
239
+ Returns:
240
+ ExecutionResult with validation report and metrics.
241
+
242
+ Example:
243
+ suite = generate_suite(profile)
244
+ result = suite.execute(data, parallel=True)
245
+
246
+ if result.success:
247
+ print(f"All {result.passed_rules} rules passed!")
248
+ else:
249
+ print(f"Failed: {result.failed_rules} rules")
250
+ """
251
+ from truthound.profiler.integration.executor import SuiteExecutor
252
+ from truthound.profiler.integration.protocols import ExecutionContext as ExecCtx
253
+
254
+ # Create context if not provided
255
+ if context is None:
256
+ context = ExecCtx(
257
+ parallel=parallel,
258
+ fail_fast=fail_fast,
259
+ max_workers=max_workers,
260
+ timeout_seconds=timeout_seconds,
261
+ )
262
+
263
+ # Create executor and run
264
+ executor = SuiteExecutor(
265
+ parallel=context.parallel,
266
+ fail_fast=context.fail_fast,
267
+ max_workers=context.max_workers,
268
+ timeout_seconds=context.timeout_seconds,
269
+ )
270
+
271
+ return executor.execute(self, data, context)
272
+
273
+ async def execute_async(
274
+ self,
275
+ data: Any,
276
+ *,
277
+ parallel: bool = True,
278
+ fail_fast: bool = False,
279
+ context: "ExecutionContext | None" = None,
280
+ ) -> "ExecutionResult":
281
+ """Execute the validation suite asynchronously.
282
+
283
+ Args:
284
+ data: Data to validate.
285
+ parallel: Whether to run validators in parallel.
286
+ fail_fast: Whether to stop on first failure.
287
+ context: Pre-configured execution context.
288
+
289
+ Returns:
290
+ ExecutionResult with validation report and metrics.
291
+ """
292
+ from truthound.profiler.integration.executor import AsyncSuiteExecutor
293
+ from truthound.profiler.integration.protocols import ExecutionContext as ExecCtx
294
+
295
+ if context is None:
296
+ context = ExecCtx(parallel=parallel, fail_fast=fail_fast)
297
+
298
+ executor = AsyncSuiteExecutor(
299
+ parallel=context.parallel,
300
+ fail_fast=context.fail_fast,
301
+ )
302
+
303
+ return await executor.execute_async(self, data, context)
304
+
305
+ def to_python_code(self) -> str:
306
+ """Generate Python code to create validators."""
307
+ lines = [
308
+ '"""Auto-generated validation suite."""',
309
+ "",
310
+ "from truthound.validators import (",
311
+ ]
312
+
313
+ # Collect unique validator classes
314
+ validators = sorted(set(r.validator_class for r in self.rules))
315
+ for v in validators:
316
+ lines.append(f" {v},")
317
+ lines.append(")")
318
+ lines.append("")
319
+ lines.append("")
320
+ lines.append("def create_validators():")
321
+ lines.append(' """Create validation rules."""')
322
+ lines.append(" validators = []")
323
+ lines.append("")
324
+
325
+ for rule in self.rules:
326
+ lines.append(f" # {rule.name}")
327
+ if rule.description:
328
+ lines.append(f" # {rule.description}")
329
+
330
+ # Build parameters
331
+ params = []
332
+ if rule.columns:
333
+ params.append(f"columns={list(rule.columns)}")
334
+ for k, v in rule.parameters.items():
335
+ if isinstance(v, str):
336
+ params.append(f'{k}="{v}"')
337
+ else:
338
+ params.append(f"{k}={v!r}")
339
+ if rule.mostly is not None:
340
+ params.append(f"mostly={rule.mostly}")
341
+
342
+ param_str = ", ".join(params)
343
+ lines.append(f" validators.append({rule.validator_class}({param_str}))")
344
+ lines.append("")
345
+
346
+ lines.append(" return validators")
347
+ lines.append("")
348
+
349
+ return "\n".join(lines)
350
+
351
+
352
+ class ValidationSuiteGenerator:
353
+ """Generates validation suites by combining multiple rule generators.
354
+
355
+ This is the main entry point for automatic rule generation. It
356
+ orchestrates multiple generators and combines their output into
357
+ a cohesive validation suite.
358
+
359
+ Example:
360
+ generator = ValidationSuiteGenerator()
361
+ suite = generator.generate_from_profile(
362
+ profile,
363
+ strictness=Strictness.MEDIUM,
364
+ include_categories=["schema", "completeness", "format"]
365
+ )
366
+
367
+ # Export
368
+ suite.to_yaml()
369
+ suite.to_python_code()
370
+ """
371
+
372
+ def __init__(
373
+ self,
374
+ generators: Sequence[RuleGenerator] | None = None,
375
+ **kwargs: Any,
376
+ ):
377
+ """Initialize suite generator.
378
+
379
+ Args:
380
+ generators: Custom list of generators to use.
381
+ If None, uses all registered generators.
382
+ **kwargs: Additional arguments passed to generators.
383
+ """
384
+ if generators is not None:
385
+ self.generators = list(generators)
386
+ else:
387
+ # Use all registered generators
388
+ self.generators = rule_generator_registry.create_all(**kwargs)
389
+
390
+ def add_generator(self, generator: RuleGenerator) -> None:
391
+ """Add a custom generator."""
392
+ self.generators.append(generator)
393
+ # Re-sort by priority
394
+ self.generators.sort(key=lambda g: -g.priority)
395
+
396
+ def generate_from_profile(
397
+ self,
398
+ profile: TableProfile,
399
+ *,
400
+ strictness: Strictness = Strictness.MEDIUM,
401
+ include_categories: Sequence[str] | None = None,
402
+ exclude_categories: Sequence[str] | None = None,
403
+ min_confidence: RuleConfidence | None = None,
404
+ name: str | None = None,
405
+ ) -> ValidationSuite:
406
+ """Generate a validation suite from a profile.
407
+
408
+ Args:
409
+ profile: Table profile to generate rules from
410
+ strictness: How strict the generated rules should be
411
+ include_categories: Only include rules from these categories
412
+ exclude_categories: Exclude rules from these categories
413
+ min_confidence: Only include rules with at least this confidence
414
+ name: Name for the suite (defaults to profile name)
415
+
416
+ Returns:
417
+ Generated validation suite
418
+ """
419
+ all_rules: list[GeneratedRule] = []
420
+
421
+ # Convert category strings to enums
422
+ include_cats = None
423
+ if include_categories:
424
+ include_cats = {RuleCategory(c) for c in include_categories}
425
+
426
+ exclude_cats = set()
427
+ if exclude_categories:
428
+ exclude_cats = {RuleCategory(c) for c in exclude_categories}
429
+
430
+ # Run each generator
431
+ for generator in self.generators:
432
+ # Skip if generator doesn't produce any included categories
433
+ if include_cats:
434
+ if not generator.categories & include_cats:
435
+ continue
436
+
437
+ # Skip if all generator categories are excluded
438
+ if exclude_cats:
439
+ if generator.categories <= exclude_cats:
440
+ continue
441
+
442
+ try:
443
+ rules = generator.generate(profile, strictness)
444
+
445
+ # Filter by category
446
+ if include_cats:
447
+ rules = [r for r in rules if r.category in include_cats]
448
+ if exclude_cats:
449
+ rules = [r for r in rules if r.category not in exclude_cats]
450
+
451
+ all_rules.extend(rules)
452
+ except Exception:
453
+ # Skip failed generators
454
+ pass
455
+
456
+ # Filter by confidence
457
+ if min_confidence:
458
+ confidence_order = {
459
+ RuleConfidence.LOW: 0,
460
+ RuleConfidence.MEDIUM: 1,
461
+ RuleConfidence.HIGH: 2,
462
+ }
463
+ min_level = confidence_order[min_confidence]
464
+ all_rules = [
465
+ r for r in all_rules
466
+ if confidence_order[r.confidence] >= min_level
467
+ ]
468
+
469
+ # Deduplicate rules (same name = same rule)
470
+ seen_names: set[str] = set()
471
+ unique_rules: list[GeneratedRule] = []
472
+ for rule in all_rules:
473
+ if rule.name not in seen_names:
474
+ seen_names.add(rule.name)
475
+ unique_rules.append(rule)
476
+
477
+ return ValidationSuite(
478
+ name=name or profile.name or "generated_suite",
479
+ rules=tuple(unique_rules),
480
+ source_profile=profile.name,
481
+ strictness=strictness,
482
+ metadata={
483
+ "profile_row_count": profile.row_count,
484
+ "profile_column_count": profile.column_count,
485
+ "generators_used": [g.name for g in self.generators],
486
+ },
487
+ )
488
+
489
+
490
+ # =============================================================================
491
+ # Profile Adapter
492
+ # =============================================================================
493
+
494
+
495
+ class ProfileAdapter:
496
+ """Adapter for converting various profile types to TableProfile.
497
+
498
+ This enables generate_suite() to work with different profile types:
499
+ - TableProfile: Native profiler output (used directly)
500
+ - ProfileReport: Simplified report from th.profile() API
501
+ - dict: Dictionary representation of a profile
502
+
503
+ Example:
504
+ # From ProfileReport (th.profile() output)
505
+ import truthound as th
506
+ profile_report = th.profile("data.csv")
507
+ table_profile = ProfileAdapter.to_table_profile(profile_report)
508
+
509
+ # From dict
510
+ profile_dict = {"row_count": 100, "columns": [...]}
511
+ table_profile = ProfileAdapter.to_table_profile(profile_dict)
512
+ """
513
+
514
+ @staticmethod
515
+ def to_table_profile(profile: ProfileInput) -> TableProfile:
516
+ """Convert any profile type to TableProfile.
517
+
518
+ Args:
519
+ profile: Profile in any supported format.
520
+
521
+ Returns:
522
+ TableProfile instance.
523
+
524
+ Raises:
525
+ TypeError: If the profile type is not supported.
526
+ """
527
+ # Already a TableProfile
528
+ if isinstance(profile, TableProfile):
529
+ return profile
530
+
531
+ # Check for ProfileReport type (from truthound.report)
532
+ if hasattr(profile, 'source') and hasattr(profile, 'columns') and hasattr(profile, 'row_count'):
533
+ # Duck typing for ProfileReport
534
+ return ProfileAdapter._from_profile_report(profile)
535
+
536
+ # Dictionary format
537
+ if isinstance(profile, dict):
538
+ return ProfileAdapter._from_dict(profile)
539
+
540
+ raise TypeError(
541
+ f"Unsupported profile type: {type(profile).__name__}. "
542
+ "Expected TableProfile, ProfileReport, or dict."
543
+ )
544
+
545
+ @staticmethod
546
+ def _from_profile_report(report: Any) -> TableProfile:
547
+ """Convert ProfileReport to TableProfile.
548
+
549
+ Args:
550
+ report: ProfileReport instance.
551
+
552
+ Returns:
553
+ TableProfile with extracted information.
554
+ """
555
+ # Extract column profiles from ProfileReport
556
+ column_profiles: list[ColumnProfile] = []
557
+
558
+ for col_dict in getattr(report, 'columns', []):
559
+ # Parse null percentage
560
+ null_pct_str = col_dict.get('null_pct', '0%')
561
+ null_ratio = ProfileAdapter._parse_percentage(null_pct_str)
562
+
563
+ # Parse unique percentage
564
+ unique_pct_str = col_dict.get('unique_pct', '0%')
565
+ unique_ratio = ProfileAdapter._parse_percentage(unique_pct_str)
566
+
567
+ # Infer data type from dtype string
568
+ dtype_str = col_dict.get('dtype', 'unknown')
569
+ inferred_type = ProfileAdapter._infer_data_type(dtype_str)
570
+
571
+ row_count = getattr(report, 'row_count', 0)
572
+ null_count = int(null_ratio * row_count) if row_count > 0 else 0
573
+ distinct_count = int(unique_ratio * row_count) if row_count > 0 else 0
574
+
575
+ col_profile = ColumnProfile(
576
+ name=col_dict.get('name', ''),
577
+ physical_type=dtype_str,
578
+ inferred_type=inferred_type,
579
+ row_count=row_count,
580
+ null_count=null_count,
581
+ null_ratio=null_ratio,
582
+ distinct_count=distinct_count,
583
+ unique_ratio=unique_ratio,
584
+ is_unique=unique_ratio >= 0.99,
585
+ is_constant=distinct_count <= 1,
586
+ )
587
+ column_profiles.append(col_profile)
588
+
589
+ return TableProfile(
590
+ name=getattr(report, 'source', 'unknown'),
591
+ row_count=getattr(report, 'row_count', 0),
592
+ column_count=getattr(report, 'column_count', len(column_profiles)),
593
+ estimated_memory_bytes=getattr(report, 'size_bytes', 0),
594
+ columns=tuple(column_profiles),
595
+ source=getattr(report, 'source', 'unknown'),
596
+ profiled_at=datetime.now(),
597
+ )
598
+
599
+ @staticmethod
600
+ def _from_dict(data: dict[str, Any]) -> TableProfile:
601
+ """Convert dict to TableProfile.
602
+
603
+ Args:
604
+ data: Dictionary with profile data.
605
+
606
+ Returns:
607
+ TableProfile instance.
608
+ """
609
+ # Extract column profiles
610
+ column_profiles: list[ColumnProfile] = []
611
+
612
+ for col_dict in data.get('columns', []):
613
+ # Handle both TableProfile and ProfileReport dict formats
614
+ if 'inferred_type' in col_dict:
615
+ # TableProfile format
616
+ inferred_type = DataType(col_dict.get('inferred_type', 'unknown'))
617
+ else:
618
+ # ProfileReport format
619
+ dtype_str = col_dict.get('dtype', col_dict.get('physical_type', 'unknown'))
620
+ inferred_type = ProfileAdapter._infer_data_type(dtype_str)
621
+
622
+ null_ratio = col_dict.get('null_ratio', 0.0)
623
+ if isinstance(null_ratio, str):
624
+ null_ratio = ProfileAdapter._parse_percentage(null_ratio)
625
+
626
+ unique_ratio = col_dict.get('unique_ratio', 0.0)
627
+ if isinstance(unique_ratio, str):
628
+ unique_ratio = ProfileAdapter._parse_percentage(unique_ratio)
629
+
630
+ row_count = col_dict.get('row_count', data.get('row_count', 0))
631
+
632
+ col_profile = ColumnProfile(
633
+ name=col_dict.get('name', ''),
634
+ physical_type=col_dict.get('physical_type', col_dict.get('dtype', 'unknown')),
635
+ inferred_type=inferred_type,
636
+ row_count=row_count,
637
+ null_count=col_dict.get('null_count', int(null_ratio * row_count)),
638
+ null_ratio=null_ratio,
639
+ distinct_count=col_dict.get('distinct_count', int(unique_ratio * row_count)),
640
+ unique_ratio=unique_ratio,
641
+ is_unique=col_dict.get('is_unique', unique_ratio >= 0.99),
642
+ is_constant=col_dict.get('is_constant', False),
643
+ )
644
+ column_profiles.append(col_profile)
645
+
646
+ return TableProfile(
647
+ name=data.get('name', data.get('source', 'unknown')),
648
+ row_count=data.get('row_count', 0),
649
+ column_count=data.get('column_count', len(column_profiles)),
650
+ estimated_memory_bytes=data.get('estimated_memory_bytes', data.get('size_bytes', 0)),
651
+ columns=tuple(column_profiles),
652
+ duplicate_row_count=data.get('duplicate_row_count', 0),
653
+ duplicate_row_ratio=data.get('duplicate_row_ratio', 0.0),
654
+ source=data.get('source', data.get('name', '')),
655
+ profiled_at=datetime.now(),
656
+ )
657
+
658
+ @staticmethod
659
+ def _parse_percentage(pct_str: str) -> float:
660
+ """Parse percentage string to float ratio.
661
+
662
+ Args:
663
+ pct_str: String like "10.5%" or "10.5"
664
+
665
+ Returns:
666
+ Float ratio between 0.0 and 1.0
667
+ """
668
+ if isinstance(pct_str, (int, float)):
669
+ # Already a number, assume it's a ratio if < 1, percentage if >= 1
670
+ return pct_str / 100.0 if pct_str > 1 else pct_str
671
+
672
+ pct_str = str(pct_str).strip()
673
+ if pct_str.endswith('%'):
674
+ try:
675
+ return float(pct_str[:-1]) / 100.0
676
+ except ValueError:
677
+ return 0.0
678
+ try:
679
+ value = float(pct_str)
680
+ return value / 100.0 if value > 1 else value
681
+ except ValueError:
682
+ return 0.0
683
+
684
+ @staticmethod
685
+ def _infer_data_type(dtype_str: str) -> DataType:
686
+ """Infer DataType from Polars dtype string.
687
+
688
+ Args:
689
+ dtype_str: Polars dtype string like "Int64", "String", etc.
690
+
691
+ Returns:
692
+ Inferred DataType enum value.
693
+ """
694
+ dtype_lower = dtype_str.lower()
695
+
696
+ # Integer types
697
+ if any(t in dtype_lower for t in ['int', 'i8', 'i16', 'i32', 'i64', 'u8', 'u16', 'u32', 'u64']):
698
+ return DataType.INTEGER
699
+
700
+ # Float types
701
+ if any(t in dtype_lower for t in ['float', 'f32', 'f64', 'decimal']):
702
+ return DataType.FLOAT
703
+
704
+ # Boolean
705
+ if 'bool' in dtype_lower:
706
+ return DataType.BOOLEAN
707
+
708
+ # Datetime types
709
+ if 'datetime' in dtype_lower:
710
+ return DataType.DATETIME
711
+ if 'date' in dtype_lower:
712
+ return DataType.DATE
713
+ if 'time' in dtype_lower:
714
+ return DataType.TIME
715
+ if 'duration' in dtype_lower:
716
+ return DataType.DURATION
717
+
718
+ # String types
719
+ if any(t in dtype_lower for t in ['str', 'string', 'utf8', 'categorical']):
720
+ return DataType.STRING
721
+
722
+ return DataType.UNKNOWN
723
+
724
+
725
+ # =============================================================================
726
+ # Convenience Functions
727
+ # =============================================================================
728
+
729
+
730
+ def generate_suite(
731
+ profile: ProfileInput,
732
+ *,
733
+ strictness: str | Strictness = "medium",
734
+ include_categories: Sequence[str] | None = None,
735
+ exclude_categories: Sequence[str] | None = None,
736
+ min_confidence: str | RuleConfidence | None = None,
737
+ name: str | None = None,
738
+ ) -> ValidationSuite:
739
+ """Generate a validation suite from a profile.
740
+
741
+ Supports multiple profile types:
742
+ - TableProfile: Native profiler output (from profile_file(), profile_dataframe())
743
+ - ProfileReport: Simplified report from th.profile() API
744
+ - dict: Dictionary representation of a profile
745
+
746
+ Args:
747
+ profile: Profile in any supported format (TableProfile, ProfileReport, or dict)
748
+ strictness: "loose", "medium", or "strict"
749
+ include_categories: Only include rules from these categories
750
+ exclude_categories: Exclude rules from these categories
751
+ min_confidence: "low", "medium", or "high"
752
+ name: Name for the suite
753
+
754
+ Returns:
755
+ Generated validation suite
756
+
757
+ Example:
758
+ # Using profile_file() - recommended for full features
759
+ from truthound.profiler import profile_file, generate_suite
760
+ profile = profile_file("data.parquet")
761
+ suite = generate_suite(profile, strictness="medium")
762
+
763
+ # Using th.profile() - simpler API
764
+ import truthound as th
765
+ profile_report = th.profile("data.csv")
766
+ suite = generate_suite(profile_report)
767
+
768
+ # View as YAML
769
+ print(suite.to_yaml())
770
+
771
+ # Generate Python code
772
+ print(suite.to_python_code())
773
+ """
774
+ # Convert to TableProfile if needed
775
+ table_profile = ProfileAdapter.to_table_profile(profile)
776
+
777
+ # Convert strings to enums
778
+ if isinstance(strictness, str):
779
+ strictness = Strictness(strictness)
780
+
781
+ if isinstance(min_confidence, str):
782
+ min_confidence = RuleConfidence(min_confidence)
783
+
784
+ generator = ValidationSuiteGenerator()
785
+ return generator.generate_from_profile(
786
+ table_profile,
787
+ strictness=strictness,
788
+ include_categories=include_categories,
789
+ exclude_categories=exclude_categories,
790
+ min_confidence=min_confidence,
791
+ name=name,
792
+ )
793
+
794
+
795
+ def save_suite(
796
+ suite: ValidationSuite,
797
+ path: str | Path,
798
+ format: str = "json",
799
+ ) -> None:
800
+ """Save a validation suite to a file.
801
+
802
+ Args:
803
+ suite: Suite to save
804
+ path: Output file path
805
+ format: "json", "yaml", or "python"
806
+ """
807
+ path = Path(path)
808
+
809
+ if format == "json":
810
+ with open(path, "w", encoding="utf-8") as f:
811
+ json.dump(suite.to_dict(), f, indent=2, ensure_ascii=False)
812
+ elif format == "yaml":
813
+ with open(path, "w", encoding="utf-8") as f:
814
+ f.write(suite.to_yaml())
815
+ elif format == "python":
816
+ with open(path, "w", encoding="utf-8") as f:
817
+ f.write(suite.to_python_code())
818
+ else:
819
+ raise ValueError(f"Unknown format: {format}. Use 'json', 'yaml', or 'python'")
820
+
821
+
822
+ def load_suite(path: str | Path) -> ValidationSuite:
823
+ """Load a validation suite from a JSON file.
824
+
825
+ Args:
826
+ path: Path to the suite JSON file
827
+
828
+ Returns:
829
+ Loaded validation suite
830
+ """
831
+ path = Path(path)
832
+
833
+ with open(path, "r", encoding="utf-8") as f:
834
+ data = json.load(f)
835
+
836
+ rules = tuple(
837
+ GeneratedRule(
838
+ name=r["name"],
839
+ validator_class=r["validator_class"],
840
+ category=RuleCategory(r["category"]),
841
+ parameters=r.get("parameters", {}),
842
+ columns=tuple(r.get("columns", [])),
843
+ confidence=RuleConfidence(r.get("confidence", "medium")),
844
+ description=r.get("description", ""),
845
+ rationale=r.get("rationale", ""),
846
+ mostly=r.get("mostly"),
847
+ )
848
+ for r in data.get("rules", [])
849
+ )
850
+
851
+ return ValidationSuite(
852
+ name=data.get("name", ""),
853
+ rules=rules,
854
+ source_profile=data.get("source_profile", ""),
855
+ strictness=Strictness(data.get("strictness", "medium")),
856
+ metadata=data.get("metadata", {}),
857
+ )