truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1345 @@
1
+ """Rule quality scoring with precision/recall estimation.
2
+
3
+ This module provides comprehensive quality metrics for generated rules:
4
+ - Precision and recall estimation
5
+ - F1 score calculation
6
+ - Confidence scoring
7
+ - Rule validation against sample data
8
+ - Quality trend analysis
9
+
10
+ Key features:
11
+ - Pluggable quality estimator architecture
12
+ - Statistical sampling for large datasets
13
+ - Historical quality tracking
14
+ - Feedback loop integration
15
+
16
+ Example:
17
+ from truthound.profiler.quality import (
18
+ RuleQualityScorer,
19
+ QualityMetrics,
20
+ estimate_quality,
21
+ )
22
+
23
+ # Score a rule
24
+ scorer = RuleQualityScorer()
25
+ metrics = scorer.score(rule, data)
26
+
27
+ print(f"Precision: {metrics.precision:.2%}")
28
+ print(f"Recall: {metrics.recall:.2%}")
29
+ print(f"F1 Score: {metrics.f1_score:.2%}")
30
+ """
31
+
32
+ from __future__ import annotations
33
+
34
+ import hashlib
35
+ import json
36
+ import math
37
+ import random
38
+ import re
39
+ import threading
40
+ from abc import ABC, abstractmethod
41
+ from collections import defaultdict
42
+ from dataclasses import dataclass, field
43
+ from datetime import datetime, timedelta
44
+ from enum import Enum
45
+ from pathlib import Path
46
+ from typing import Any, Callable, Generic, Protocol, TypeVar
47
+
48
+ import polars as pl
49
+
50
+ from truthound.profiler.base import ColumnProfile, TableProfile, DataType
51
+
52
+
53
+ # =============================================================================
54
+ # Types and Enums
55
+ # =============================================================================
56
+
57
+
58
+ class QualityLevel(str, Enum):
59
+ """Quality level classification."""
60
+
61
+ EXCELLENT = "excellent" # F1 >= 0.95
62
+ GOOD = "good" # F1 >= 0.85
63
+ ACCEPTABLE = "acceptable" # F1 >= 0.70
64
+ POOR = "poor" # F1 >= 0.50
65
+ UNACCEPTABLE = "unacceptable" # F1 < 0.50
66
+
67
+ @classmethod
68
+ def from_f1(cls, f1_score: float) -> "QualityLevel":
69
+ """Determine quality level from F1 score."""
70
+ if f1_score >= 0.95:
71
+ return cls.EXCELLENT
72
+ elif f1_score >= 0.85:
73
+ return cls.GOOD
74
+ elif f1_score >= 0.70:
75
+ return cls.ACCEPTABLE
76
+ elif f1_score >= 0.50:
77
+ return cls.POOR
78
+ else:
79
+ return cls.UNACCEPTABLE
80
+
81
+
82
+ class RuleType(str, Enum):
83
+ """Types of validation rules."""
84
+
85
+ SCHEMA = "schema"
86
+ FORMAT = "format"
87
+ RANGE = "range"
88
+ UNIQUENESS = "uniqueness"
89
+ COMPLETENESS = "completeness"
90
+ PATTERN = "pattern"
91
+ CUSTOM = "custom"
92
+
93
+
94
+ # =============================================================================
95
+ # Quality Metrics
96
+ # =============================================================================
97
+
98
+
99
+ @dataclass(frozen=True)
100
+ class ConfusionMatrix:
101
+ """Confusion matrix for rule evaluation."""
102
+
103
+ true_positives: int = 0
104
+ true_negatives: int = 0
105
+ false_positives: int = 0
106
+ false_negatives: int = 0
107
+
108
+ @property
109
+ def total(self) -> int:
110
+ """Total observations."""
111
+ return (
112
+ self.true_positives + self.true_negatives +
113
+ self.false_positives + self.false_negatives
114
+ )
115
+
116
+ @property
117
+ def accuracy(self) -> float:
118
+ """Calculate accuracy."""
119
+ if self.total == 0:
120
+ return 0.0
121
+ return (self.true_positives + self.true_negatives) / self.total
122
+
123
+ @property
124
+ def precision(self) -> float:
125
+ """Calculate precision (PPV)."""
126
+ denominator = self.true_positives + self.false_positives
127
+ if denominator == 0:
128
+ return 0.0
129
+ return self.true_positives / denominator
130
+
131
+ @property
132
+ def recall(self) -> float:
133
+ """Calculate recall (sensitivity/TPR)."""
134
+ denominator = self.true_positives + self.false_negatives
135
+ if denominator == 0:
136
+ return 0.0
137
+ return self.true_positives / denominator
138
+
139
+ @property
140
+ def specificity(self) -> float:
141
+ """Calculate specificity (TNR)."""
142
+ denominator = self.true_negatives + self.false_positives
143
+ if denominator == 0:
144
+ return 0.0
145
+ return self.true_negatives / denominator
146
+
147
+ @property
148
+ def f1_score(self) -> float:
149
+ """Calculate F1 score."""
150
+ p, r = self.precision, self.recall
151
+ if p + r == 0:
152
+ return 0.0
153
+ return 2 * (p * r) / (p + r)
154
+
155
+ @property
156
+ def f_beta(self) -> Callable[[float], float]:
157
+ """Calculate F-beta score with given beta."""
158
+ def calc(beta: float) -> float:
159
+ p, r = self.precision, self.recall
160
+ if p + r == 0:
161
+ return 0.0
162
+ beta_sq = beta ** 2
163
+ return (1 + beta_sq) * (p * r) / (beta_sq * p + r)
164
+ return calc
165
+
166
+ @property
167
+ def mcc(self) -> float:
168
+ """Calculate Matthews Correlation Coefficient."""
169
+ tp, tn = self.true_positives, self.true_negatives
170
+ fp, fn = self.false_positives, self.false_negatives
171
+
172
+ numerator = tp * tn - fp * fn
173
+ denominator = math.sqrt(
174
+ (tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)
175
+ )
176
+
177
+ if denominator == 0:
178
+ return 0.0
179
+ return numerator / denominator
180
+
181
+ def to_dict(self) -> dict[str, Any]:
182
+ """Convert to dictionary."""
183
+ return {
184
+ "true_positives": self.true_positives,
185
+ "true_negatives": self.true_negatives,
186
+ "false_positives": self.false_positives,
187
+ "false_negatives": self.false_negatives,
188
+ "accuracy": self.accuracy,
189
+ "precision": self.precision,
190
+ "recall": self.recall,
191
+ "specificity": self.specificity,
192
+ "f1_score": self.f1_score,
193
+ "mcc": self.mcc,
194
+ }
195
+
196
+
197
+ @dataclass
198
+ class QualityMetrics:
199
+ """Complete quality metrics for a rule.
200
+
201
+ Contains precision, recall, F1, and additional quality indicators.
202
+ """
203
+
204
+ # Core metrics
205
+ precision: float = 0.0
206
+ recall: float = 0.0
207
+ f1_score: float = 0.0
208
+ accuracy: float = 0.0
209
+
210
+ # Additional metrics
211
+ specificity: float = 0.0
212
+ mcc: float = 0.0 # Matthews Correlation Coefficient
213
+
214
+ # Confidence intervals (95%)
215
+ precision_ci: tuple[float, float] = (0.0, 0.0)
216
+ recall_ci: tuple[float, float] = (0.0, 0.0)
217
+ f1_ci: tuple[float, float] = (0.0, 0.0)
218
+
219
+ # Sample info
220
+ sample_size: int = 0
221
+ population_size: int = 0
222
+
223
+ # Quality assessment
224
+ quality_level: QualityLevel = QualityLevel.UNACCEPTABLE
225
+ confidence: float = 0.0 # Confidence in the metrics
226
+
227
+ # Confusion matrix
228
+ confusion_matrix: ConfusionMatrix | None = None
229
+
230
+ # Metadata
231
+ evaluated_at: datetime = field(default_factory=datetime.now)
232
+ evaluation_duration_ms: float = 0.0
233
+
234
+ @classmethod
235
+ def from_confusion_matrix(
236
+ cls,
237
+ matrix: ConfusionMatrix,
238
+ sample_size: int = 0,
239
+ population_size: int = 0,
240
+ ) -> "QualityMetrics":
241
+ """Create metrics from confusion matrix."""
242
+ metrics = cls(
243
+ precision=matrix.precision,
244
+ recall=matrix.recall,
245
+ f1_score=matrix.f1_score,
246
+ accuracy=matrix.accuracy,
247
+ specificity=matrix.specificity,
248
+ mcc=matrix.mcc,
249
+ sample_size=sample_size,
250
+ population_size=population_size,
251
+ quality_level=QualityLevel.from_f1(matrix.f1_score),
252
+ confusion_matrix=matrix,
253
+ )
254
+
255
+ # Calculate confidence intervals
256
+ if sample_size > 0:
257
+ metrics.precision_ci = cls._wilson_ci(
258
+ matrix.true_positives,
259
+ matrix.true_positives + matrix.false_positives,
260
+ )
261
+ metrics.recall_ci = cls._wilson_ci(
262
+ matrix.true_positives,
263
+ matrix.true_positives + matrix.false_negatives,
264
+ )
265
+
266
+ # Confidence based on sample size
267
+ metrics.confidence = min(1.0, sample_size / max(population_size, 1))
268
+
269
+ return metrics
270
+
271
+ @staticmethod
272
+ def _wilson_ci(successes: int, trials: int, z: float = 1.96) -> tuple[float, float]:
273
+ """Calculate Wilson confidence interval."""
274
+ if trials == 0:
275
+ return (0.0, 0.0)
276
+
277
+ p = successes / trials
278
+ denominator = 1 + z * z / trials
279
+ centre = p + z * z / (2 * trials)
280
+ margin = z * math.sqrt((p * (1 - p) + z * z / (4 * trials)) / trials)
281
+
282
+ lower = max(0.0, (centre - margin) / denominator)
283
+ upper = min(1.0, (centre + margin) / denominator)
284
+
285
+ return (lower, upper)
286
+
287
+ def to_dict(self) -> dict[str, Any]:
288
+ """Convert to dictionary."""
289
+ result = {
290
+ "precision": self.precision,
291
+ "recall": self.recall,
292
+ "f1_score": self.f1_score,
293
+ "accuracy": self.accuracy,
294
+ "specificity": self.specificity,
295
+ "mcc": self.mcc,
296
+ "precision_ci": self.precision_ci,
297
+ "recall_ci": self.recall_ci,
298
+ "f1_ci": self.f1_ci,
299
+ "sample_size": self.sample_size,
300
+ "population_size": self.population_size,
301
+ "quality_level": self.quality_level.value,
302
+ "confidence": self.confidence,
303
+ "evaluated_at": self.evaluated_at.isoformat(),
304
+ "evaluation_duration_ms": self.evaluation_duration_ms,
305
+ }
306
+
307
+ if self.confusion_matrix:
308
+ result["confusion_matrix"] = self.confusion_matrix.to_dict()
309
+
310
+ return result
311
+
312
+
313
+ # =============================================================================
314
+ # Rule Protocol
315
+ # =============================================================================
316
+
317
+
318
+ class RuleProtocol(Protocol):
319
+ """Protocol for validation rules."""
320
+
321
+ name: str
322
+ rule_type: RuleType
323
+ column: str | None
324
+
325
+ def validate(self, value: Any) -> bool:
326
+ """Validate a single value."""
327
+ ...
328
+
329
+ def validate_column(self, df: pl.DataFrame, column: str) -> pl.Series:
330
+ """Validate a column, returning boolean series."""
331
+ ...
332
+
333
+
334
+ @dataclass
335
+ class ValidationRule:
336
+ """Simple validation rule implementation."""
337
+
338
+ name: str
339
+ rule_type: RuleType
340
+ column: str | None = None
341
+ pattern: str | None = None
342
+ min_value: float | None = None
343
+ max_value: float | None = None
344
+ allowed_values: set[Any] | None = None
345
+ nullable: bool = True
346
+ validate_fn: Callable[[Any], bool] | None = None
347
+
348
+ def validate(self, value: Any) -> bool:
349
+ """Validate a single value."""
350
+ if value is None:
351
+ return self.nullable
352
+
353
+ if self.validate_fn:
354
+ return self.validate_fn(value)
355
+
356
+ if self.pattern:
357
+ if not isinstance(value, str):
358
+ return False
359
+ return bool(re.match(self.pattern, value))
360
+
361
+ if self.min_value is not None and value < self.min_value:
362
+ return False
363
+
364
+ if self.max_value is not None and value > self.max_value:
365
+ return False
366
+
367
+ if self.allowed_values is not None and value not in self.allowed_values:
368
+ return False
369
+
370
+ return True
371
+
372
+ def validate_column(self, df: pl.DataFrame, column: str) -> pl.Series:
373
+ """Validate a column, returning boolean series."""
374
+ col = df.get_column(column)
375
+
376
+ # Handle nulls
377
+ is_null = col.is_null()
378
+ if self.nullable:
379
+ valid = is_null # Nulls are valid if nullable
380
+ else:
381
+ valid = ~is_null # Nulls are invalid if not nullable
382
+
383
+ # Apply rule-specific validation
384
+ non_null = ~is_null
385
+
386
+ if self.pattern:
387
+ valid = valid | (non_null & col.cast(pl.Utf8).str.contains(self.pattern))
388
+
389
+ elif self.min_value is not None or self.max_value is not None:
390
+ if self.min_value is not None:
391
+ valid = valid & (is_null | (col >= self.min_value))
392
+ if self.max_value is not None:
393
+ valid = valid & (is_null | (col <= self.max_value))
394
+
395
+ elif self.allowed_values is not None:
396
+ valid = valid | (non_null & col.is_in(list(self.allowed_values)))
397
+
398
+ return valid
399
+
400
+
401
+ # =============================================================================
402
+ # Quality Estimator Protocol
403
+ # =============================================================================
404
+
405
+
406
+ class QualityEstimator(ABC):
407
+ """Abstract base class for quality estimators.
408
+
409
+ Different estimators use different strategies to estimate
410
+ rule quality (sampling, heuristics, etc.)
411
+ """
412
+
413
+ name: str = "base"
414
+
415
+ @abstractmethod
416
+ def estimate(
417
+ self,
418
+ rule: RuleProtocol,
419
+ data: pl.DataFrame,
420
+ ground_truth: pl.Series | None = None,
421
+ ) -> QualityMetrics:
422
+ """Estimate quality metrics for a rule.
423
+
424
+ Args:
425
+ rule: Rule to evaluate
426
+ data: Data to evaluate against
427
+ ground_truth: Optional ground truth labels
428
+
429
+ Returns:
430
+ Quality metrics
431
+ """
432
+ pass
433
+
434
+
435
+ class SamplingQualityEstimator(QualityEstimator):
436
+ """Estimates quality using statistical sampling.
437
+
438
+ Uses random sampling to estimate precision and recall
439
+ with confidence intervals.
440
+ """
441
+
442
+ name = "sampling"
443
+
444
+ def __init__(
445
+ self,
446
+ sample_size: int = 1000,
447
+ confidence_level: float = 0.95,
448
+ random_seed: int | None = None,
449
+ ):
450
+ self.sample_size = sample_size
451
+ self.confidence_level = confidence_level
452
+ self.random_seed = random_seed
453
+
454
+ def estimate(
455
+ self,
456
+ rule: RuleProtocol,
457
+ data: pl.DataFrame,
458
+ ground_truth: pl.Series | None = None,
459
+ ) -> QualityMetrics:
460
+ """Estimate quality via sampling."""
461
+ start_time = datetime.now()
462
+
463
+ column = rule.column
464
+ if column is None or column not in data.columns:
465
+ return QualityMetrics()
466
+
467
+ # Sample data if needed
468
+ population_size = len(data)
469
+ if population_size > self.sample_size:
470
+ if self.random_seed is not None:
471
+ random.seed(self.random_seed)
472
+ indices = random.sample(range(population_size), self.sample_size)
473
+ sample = data[indices]
474
+ sample_size = self.sample_size
475
+ else:
476
+ sample = data
477
+ sample_size = population_size
478
+
479
+ # Validate sample
480
+ predictions = rule.validate_column(sample, column)
481
+
482
+ # If we have ground truth, calculate confusion matrix
483
+ if ground_truth is not None:
484
+ if len(ground_truth) > self.sample_size:
485
+ gt_sample = ground_truth[indices] if population_size > self.sample_size else ground_truth
486
+ else:
487
+ gt_sample = ground_truth
488
+
489
+ matrix = self._calculate_confusion_matrix(predictions, gt_sample)
490
+ else:
491
+ # Without ground truth, estimate based on data patterns
492
+ matrix = self._estimate_confusion_matrix(predictions, sample, column)
493
+
494
+ duration_ms = (datetime.now() - start_time).total_seconds() * 1000
495
+
496
+ metrics = QualityMetrics.from_confusion_matrix(
497
+ matrix,
498
+ sample_size=sample_size,
499
+ population_size=population_size,
500
+ )
501
+ metrics.evaluation_duration_ms = duration_ms
502
+
503
+ return metrics
504
+
505
+ def _calculate_confusion_matrix(
506
+ self,
507
+ predictions: pl.Series,
508
+ ground_truth: pl.Series,
509
+ ) -> ConfusionMatrix:
510
+ """Calculate confusion matrix from predictions and ground truth."""
511
+ pred_array = predictions.to_numpy()
512
+ truth_array = ground_truth.to_numpy()
513
+
514
+ tp = int(((pred_array == True) & (truth_array == True)).sum())
515
+ tn = int(((pred_array == False) & (truth_array == False)).sum())
516
+ fp = int(((pred_array == True) & (truth_array == False)).sum())
517
+ fn = int(((pred_array == False) & (truth_array == True)).sum())
518
+
519
+ return ConfusionMatrix(
520
+ true_positives=tp,
521
+ true_negatives=tn,
522
+ false_positives=fp,
523
+ false_negatives=fn,
524
+ )
525
+
526
+ def _estimate_confusion_matrix(
527
+ self,
528
+ predictions: pl.Series,
529
+ data: pl.DataFrame,
530
+ column: str,
531
+ ) -> ConfusionMatrix:
532
+ """Estimate confusion matrix without ground truth.
533
+
534
+ Uses heuristics based on data distribution to estimate
535
+ likely true/false positive rates.
536
+ """
537
+ valid_count = predictions.sum()
538
+ invalid_count = len(predictions) - valid_count
539
+
540
+ # Heuristic: assume most valid predictions are true positives
541
+ # and most invalid predictions are true negatives
542
+ # This is a simplification - actual FP/FN rates depend on the rule
543
+
544
+ # Estimate FP rate based on rule strictness
545
+ estimated_fp_rate = 0.02 # Conservative estimate
546
+ estimated_fn_rate = 0.05 # Conservative estimate
547
+
548
+ tp = int(valid_count * (1 - estimated_fp_rate))
549
+ fp = int(valid_count * estimated_fp_rate)
550
+ tn = int(invalid_count * (1 - estimated_fn_rate))
551
+ fn = int(invalid_count * estimated_fn_rate)
552
+
553
+ return ConfusionMatrix(
554
+ true_positives=tp,
555
+ true_negatives=tn,
556
+ false_positives=fp,
557
+ false_negatives=fn,
558
+ )
559
+
560
+
561
+ class HeuristicQualityEstimator(QualityEstimator):
562
+ """Estimates quality using heuristics and data patterns.
563
+
564
+ Useful when ground truth is not available and sampling
565
+ is not practical.
566
+ """
567
+
568
+ name = "heuristic"
569
+
570
+ def __init__(self, strictness: float = 0.5):
571
+ self.strictness = strictness # 0.0 = loose, 1.0 = strict
572
+
573
+ def estimate(
574
+ self,
575
+ rule: RuleProtocol,
576
+ data: pl.DataFrame,
577
+ ground_truth: pl.Series | None = None,
578
+ ) -> QualityMetrics:
579
+ """Estimate quality using heuristics."""
580
+ start_time = datetime.now()
581
+
582
+ column = rule.column
583
+ if column is None or column not in data.columns:
584
+ return QualityMetrics()
585
+
586
+ col = data.get_column(column)
587
+ predictions = rule.validate_column(data, column)
588
+
589
+ # Calculate base metrics
590
+ valid_ratio = predictions.sum() / len(predictions)
591
+ null_ratio = col.null_count() / len(col)
592
+ unique_ratio = col.n_unique() / len(col)
593
+
594
+ # Heuristic quality estimation based on rule type
595
+ if rule.rule_type == RuleType.PATTERN:
596
+ metrics = self._estimate_pattern_quality(
597
+ valid_ratio, null_ratio, unique_ratio
598
+ )
599
+ elif rule.rule_type == RuleType.RANGE:
600
+ metrics = self._estimate_range_quality(
601
+ valid_ratio, null_ratio, col
602
+ )
603
+ elif rule.rule_type == RuleType.UNIQUENESS:
604
+ metrics = self._estimate_uniqueness_quality(
605
+ valid_ratio, unique_ratio
606
+ )
607
+ else:
608
+ metrics = self._estimate_general_quality(
609
+ valid_ratio, null_ratio
610
+ )
611
+
612
+ duration_ms = (datetime.now() - start_time).total_seconds() * 1000
613
+ metrics.sample_size = len(data)
614
+ metrics.population_size = len(data)
615
+ metrics.evaluation_duration_ms = duration_ms
616
+ metrics.quality_level = QualityLevel.from_f1(metrics.f1_score)
617
+
618
+ return metrics
619
+
620
+ def _estimate_pattern_quality(
621
+ self,
622
+ valid_ratio: float,
623
+ null_ratio: float,
624
+ unique_ratio: float,
625
+ ) -> QualityMetrics:
626
+ """Estimate quality for pattern rules."""
627
+ # Pattern rules with high match ratio are likely good
628
+ # Unless the pattern is too generic (low uniqueness)
629
+
630
+ if valid_ratio > 0.95:
631
+ # Very high match - might be too loose
632
+ precision = 0.85 - (valid_ratio - 0.95) * 2
633
+ recall = 0.95
634
+ elif valid_ratio > 0.80:
635
+ # Good match ratio
636
+ precision = 0.90
637
+ recall = valid_ratio
638
+ else:
639
+ # Low match - might be too strict or wrong pattern
640
+ precision = 0.95
641
+ recall = valid_ratio
642
+
643
+ f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
644
+
645
+ return QualityMetrics(
646
+ precision=precision,
647
+ recall=recall,
648
+ f1_score=f1,
649
+ confidence=0.7, # Heuristic confidence
650
+ )
651
+
652
+ def _estimate_range_quality(
653
+ self,
654
+ valid_ratio: float,
655
+ null_ratio: float,
656
+ col: pl.Series,
657
+ ) -> QualityMetrics:
658
+ """Estimate quality for range rules."""
659
+ # Range rules are typically more reliable
660
+ # Quality depends on how well the range fits the data distribution
661
+
662
+ # Check if values are near boundaries (potential FN)
663
+ try:
664
+ if col.dtype in [pl.Float32, pl.Float64, pl.Int32, pl.Int64]:
665
+ non_null = col.drop_nulls()
666
+ if len(non_null) > 0:
667
+ std = non_null.std()
668
+ mean = non_null.mean()
669
+ # If std is large relative to mean, more uncertainty
670
+ cv = abs(std / mean) if mean != 0 else 0
671
+ precision = 0.95 if cv < 0.5 else 0.85
672
+ else:
673
+ precision = 0.90
674
+ else:
675
+ precision = 0.90
676
+ except Exception:
677
+ precision = 0.90
678
+
679
+ recall = valid_ratio
680
+ f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
681
+
682
+ return QualityMetrics(
683
+ precision=precision,
684
+ recall=recall,
685
+ f1_score=f1,
686
+ confidence=0.8,
687
+ )
688
+
689
+ def _estimate_uniqueness_quality(
690
+ self,
691
+ valid_ratio: float,
692
+ unique_ratio: float,
693
+ ) -> QualityMetrics:
694
+ """Estimate quality for uniqueness rules."""
695
+ # Uniqueness rules are binary - either unique or not
696
+ # High precision if unique_ratio is very high
697
+ if unique_ratio > 0.99:
698
+ precision = 0.98
699
+ recall = 0.95
700
+ elif unique_ratio > 0.95:
701
+ precision = 0.90
702
+ recall = 0.90
703
+ else:
704
+ precision = 0.80
705
+ recall = unique_ratio
706
+
707
+ f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
708
+
709
+ return QualityMetrics(
710
+ precision=precision,
711
+ recall=recall,
712
+ f1_score=f1,
713
+ confidence=0.85,
714
+ )
715
+
716
+ def _estimate_general_quality(
717
+ self,
718
+ valid_ratio: float,
719
+ null_ratio: float,
720
+ ) -> QualityMetrics:
721
+ """Estimate quality for general rules."""
722
+ # Default estimation
723
+ precision = 0.90
724
+ recall = valid_ratio * (1 - null_ratio)
725
+ f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
726
+
727
+ return QualityMetrics(
728
+ precision=precision,
729
+ recall=recall,
730
+ f1_score=f1,
731
+ confidence=0.6,
732
+ )
733
+
734
+
735
+ class CrossValidationEstimator(QualityEstimator):
736
+ """Estimates quality using cross-validation.
737
+
738
+ Splits data into folds and evaluates consistency across folds.
739
+ """
740
+
741
+ name = "cross_validation"
742
+
743
+ def __init__(
744
+ self,
745
+ n_folds: int = 5,
746
+ random_seed: int | None = None,
747
+ ):
748
+ self.n_folds = n_folds
749
+ self.random_seed = random_seed
750
+
751
+ def estimate(
752
+ self,
753
+ rule: RuleProtocol,
754
+ data: pl.DataFrame,
755
+ ground_truth: pl.Series | None = None,
756
+ ) -> QualityMetrics:
757
+ """Estimate quality via cross-validation."""
758
+ start_time = datetime.now()
759
+
760
+ column = rule.column
761
+ if column is None or column not in data.columns:
762
+ return QualityMetrics()
763
+
764
+ # Create folds
765
+ n = len(data)
766
+ fold_size = n // self.n_folds
767
+
768
+ if self.random_seed is not None:
769
+ random.seed(self.random_seed)
770
+
771
+ indices = list(range(n))
772
+ random.shuffle(indices)
773
+
774
+ # Evaluate on each fold
775
+ fold_metrics: list[float] = []
776
+ for i in range(self.n_folds):
777
+ start_idx = i * fold_size
778
+ end_idx = start_idx + fold_size if i < self.n_folds - 1 else n
779
+ fold_indices = indices[start_idx:end_idx]
780
+
781
+ fold_data = data[fold_indices]
782
+ predictions = rule.validate_column(fold_data, column)
783
+ valid_ratio = predictions.sum() / len(predictions)
784
+ fold_metrics.append(valid_ratio)
785
+
786
+ # Calculate consistency across folds
787
+ mean_valid = sum(fold_metrics) / len(fold_metrics)
788
+ std_valid = (sum((x - mean_valid) ** 2 for x in fold_metrics) / len(fold_metrics)) ** 0.5
789
+
790
+ # Low variance = high consistency = likely high precision
791
+ consistency = 1.0 - min(1.0, std_valid * 5)
792
+
793
+ precision = 0.85 + consistency * 0.10
794
+ recall = mean_valid
795
+ f1 = 2 * precision * recall / (precision + recall) if (precision + recall) > 0 else 0
796
+
797
+ duration_ms = (datetime.now() - start_time).total_seconds() * 1000
798
+
799
+ return QualityMetrics(
800
+ precision=precision,
801
+ recall=recall,
802
+ f1_score=f1,
803
+ confidence=consistency,
804
+ sample_size=n,
805
+ population_size=n,
806
+ quality_level=QualityLevel.from_f1(f1),
807
+ evaluation_duration_ms=duration_ms,
808
+ )
809
+
810
+
811
+ # =============================================================================
812
+ # Quality Estimator Registry
813
+ # =============================================================================
814
+
815
+
816
+ class QualityEstimatorRegistry:
817
+ """Registry for quality estimator factories."""
818
+
819
+ def __init__(self) -> None:
820
+ self._estimators: dict[str, type[QualityEstimator]] = {}
821
+
822
+ def register(
823
+ self,
824
+ name: str,
825
+ estimator_class: type[QualityEstimator],
826
+ ) -> None:
827
+ """Register an estimator class."""
828
+ self._estimators[name] = estimator_class
829
+
830
+ def create(self, name: str, **kwargs: Any) -> QualityEstimator:
831
+ """Create an estimator instance."""
832
+ if name not in self._estimators:
833
+ raise KeyError(
834
+ f"Unknown estimator: {name}. "
835
+ f"Available: {list(self._estimators.keys())}"
836
+ )
837
+ return self._estimators[name](**kwargs)
838
+
839
+ def list_estimators(self) -> list[str]:
840
+ """List registered estimator names."""
841
+ return list(self._estimators.keys())
842
+
843
+
844
+ # Global registry
845
+ quality_estimator_registry = QualityEstimatorRegistry()
846
+ quality_estimator_registry.register("sampling", SamplingQualityEstimator)
847
+ quality_estimator_registry.register("heuristic", HeuristicQualityEstimator)
848
+ quality_estimator_registry.register("cross_validation", CrossValidationEstimator)
849
+
850
+
851
+ # =============================================================================
852
+ # Rule Quality Scorer
853
+ # =============================================================================
854
+
855
+
856
+ @dataclass
857
+ class ScoringConfig:
858
+ """Configuration for quality scoring."""
859
+
860
+ estimator: str = "sampling"
861
+ estimator_options: dict[str, Any] = field(default_factory=dict)
862
+ min_sample_size: int = 100
863
+ min_confidence: float = 0.5
864
+ cache_results: bool = True
865
+ cache_ttl_seconds: int = 3600
866
+
867
+
868
+ @dataclass
869
+ class RuleQualityScore:
870
+ """Complete quality score for a rule."""
871
+
872
+ rule_name: str
873
+ rule_type: RuleType
874
+ column: str | None
875
+ metrics: QualityMetrics
876
+ recommendation: str
877
+ should_use: bool
878
+ alternatives: list[str] = field(default_factory=list)
879
+
880
+ def to_dict(self) -> dict[str, Any]:
881
+ """Convert to dictionary."""
882
+ return {
883
+ "rule_name": self.rule_name,
884
+ "rule_type": self.rule_type.value,
885
+ "column": self.column,
886
+ "metrics": self.metrics.to_dict(),
887
+ "recommendation": self.recommendation,
888
+ "should_use": self.should_use,
889
+ "alternatives": self.alternatives,
890
+ }
891
+
892
+
893
+ class RuleQualityScorer:
894
+ """Main interface for scoring rule quality.
895
+
896
+ Evaluates rules against data and provides quality recommendations.
897
+
898
+ Example:
899
+ scorer = RuleQualityScorer()
900
+
901
+ score = scorer.score(rule, data)
902
+ print(f"Should use: {score.should_use}")
903
+ print(f"Recommendation: {score.recommendation}")
904
+ """
905
+
906
+ def __init__(
907
+ self,
908
+ estimator: str | QualityEstimator = "sampling",
909
+ estimator_options: dict[str, Any] | None = None,
910
+ min_confidence: float = 0.5,
911
+ quality_threshold: float = 0.70,
912
+ ):
913
+ """Initialize scorer.
914
+
915
+ Args:
916
+ estimator: Estimator name or instance
917
+ estimator_options: Options for estimator construction
918
+ min_confidence: Minimum confidence for recommendations
919
+ quality_threshold: Minimum F1 score for rule acceptance
920
+ """
921
+ self.min_confidence = min_confidence
922
+ self.quality_threshold = quality_threshold
923
+ self._cache: dict[str, RuleQualityScore] = {}
924
+ self._lock = threading.Lock()
925
+
926
+ if isinstance(estimator, QualityEstimator):
927
+ self._estimator = estimator
928
+ else:
929
+ options = estimator_options or {}
930
+ self._estimator = quality_estimator_registry.create(estimator, **options)
931
+
932
+ def score(
933
+ self,
934
+ rule: RuleProtocol | ValidationRule,
935
+ data: pl.DataFrame,
936
+ ground_truth: pl.Series | None = None,
937
+ use_cache: bool = True,
938
+ ) -> RuleQualityScore:
939
+ """Score a rule's quality.
940
+
941
+ Args:
942
+ rule: Rule to score
943
+ data: Data to evaluate against
944
+ ground_truth: Optional ground truth labels
945
+ use_cache: Whether to use cached results
946
+
947
+ Returns:
948
+ Complete quality score
949
+ """
950
+ # Check cache
951
+ cache_key = self._make_cache_key(rule, data)
952
+ if use_cache:
953
+ with self._lock:
954
+ if cache_key in self._cache:
955
+ return self._cache[cache_key]
956
+
957
+ # Estimate metrics
958
+ metrics = self._estimator.estimate(rule, data, ground_truth)
959
+
960
+ # Generate recommendation
961
+ recommendation, should_use = self._generate_recommendation(metrics, rule)
962
+
963
+ # Create score
964
+ score = RuleQualityScore(
965
+ rule_name=rule.name,
966
+ rule_type=rule.rule_type,
967
+ column=rule.column,
968
+ metrics=metrics,
969
+ recommendation=recommendation,
970
+ should_use=should_use,
971
+ )
972
+
973
+ # Cache result
974
+ if use_cache:
975
+ with self._lock:
976
+ self._cache[cache_key] = score
977
+
978
+ return score
979
+
980
+ def score_all(
981
+ self,
982
+ rules: list[RuleProtocol | ValidationRule],
983
+ data: pl.DataFrame,
984
+ ground_truth: pl.Series | None = None,
985
+ ) -> list[RuleQualityScore]:
986
+ """Score multiple rules.
987
+
988
+ Args:
989
+ rules: Rules to score
990
+ data: Data to evaluate against
991
+ ground_truth: Optional ground truth labels
992
+
993
+ Returns:
994
+ List of quality scores
995
+ """
996
+ return [self.score(rule, data, ground_truth) for rule in rules]
997
+
998
+ def compare(
999
+ self,
1000
+ rules: list[RuleProtocol | ValidationRule],
1001
+ data: pl.DataFrame,
1002
+ ) -> list[RuleQualityScore]:
1003
+ """Compare multiple rules and rank by quality.
1004
+
1005
+ Args:
1006
+ rules: Rules to compare
1007
+ data: Data to evaluate against
1008
+
1009
+ Returns:
1010
+ Scores sorted by F1 score (best first)
1011
+ """
1012
+ scores = self.score_all(rules, data)
1013
+ return sorted(scores, key=lambda s: s.metrics.f1_score, reverse=True)
1014
+
1015
+ def _generate_recommendation(
1016
+ self,
1017
+ metrics: QualityMetrics,
1018
+ rule: RuleProtocol | ValidationRule,
1019
+ ) -> tuple[str, bool]:
1020
+ """Generate recommendation based on metrics."""
1021
+ f1 = metrics.f1_score
1022
+ precision = metrics.precision
1023
+ recall = metrics.recall
1024
+ confidence = metrics.confidence
1025
+
1026
+ # Check confidence
1027
+ if confidence < self.min_confidence:
1028
+ return (
1029
+ f"Low confidence ({confidence:.0%}). "
1030
+ "Consider collecting more data or using ground truth validation.",
1031
+ False,
1032
+ )
1033
+
1034
+ # Check quality threshold
1035
+ if f1 >= self.quality_threshold:
1036
+ if f1 >= 0.95:
1037
+ return f"Excellent rule quality (F1={f1:.2%}). Safe to use.", True
1038
+ elif f1 >= 0.85:
1039
+ return f"Good rule quality (F1={f1:.2%}). Recommended for use.", True
1040
+ else:
1041
+ return f"Acceptable quality (F1={f1:.2%}). Monitor for issues.", True
1042
+
1043
+ # Below threshold - provide specific advice
1044
+ if precision < recall:
1045
+ return (
1046
+ f"Low precision ({precision:.0%}). Rule may be too permissive. "
1047
+ "Consider stricter constraints.",
1048
+ False,
1049
+ )
1050
+ elif recall < precision:
1051
+ return (
1052
+ f"Low recall ({recall:.0%}). Rule may be too strict. "
1053
+ "Consider relaxing constraints or checking for edge cases.",
1054
+ False,
1055
+ )
1056
+ else:
1057
+ return (
1058
+ f"Poor overall quality (F1={f1:.2%}). "
1059
+ "Consider redesigning the rule or checking data quality.",
1060
+ False,
1061
+ )
1062
+
1063
+ def _make_cache_key(
1064
+ self,
1065
+ rule: RuleProtocol | ValidationRule,
1066
+ data: pl.DataFrame,
1067
+ ) -> str:
1068
+ """Create cache key for rule + data combination."""
1069
+ rule_str = f"{rule.name}:{rule.rule_type}:{rule.column}"
1070
+ data_hash = hashlib.sha256(
1071
+ f"{len(data)}:{data.columns}".encode()
1072
+ ).hexdigest()[:16]
1073
+ return f"{rule_str}:{data_hash}"
1074
+
1075
+ def clear_cache(self) -> None:
1076
+ """Clear the score cache."""
1077
+ with self._lock:
1078
+ self._cache.clear()
1079
+
1080
+
1081
+ # =============================================================================
1082
+ # Quality Trend Analyzer
1083
+ # =============================================================================
1084
+
1085
+
1086
+ @dataclass
1087
+ class QualityTrendPoint:
1088
+ """Single point in quality trend."""
1089
+
1090
+ timestamp: datetime
1091
+ metrics: QualityMetrics
1092
+ data_size: int
1093
+ notes: str = ""
1094
+
1095
+
1096
+ class QualityTrendAnalyzer:
1097
+ """Analyzes quality trends over time.
1098
+
1099
+ Tracks how rule quality changes as data evolves.
1100
+
1101
+ Example:
1102
+ analyzer = QualityTrendAnalyzer()
1103
+
1104
+ # Record quality over time
1105
+ analyzer.record(rule_name, metrics1, datetime.now())
1106
+ analyzer.record(rule_name, metrics2, datetime.now())
1107
+
1108
+ # Analyze trend
1109
+ trend = analyzer.analyze_trend(rule_name)
1110
+ print(f"Quality is {trend.direction}")
1111
+ """
1112
+
1113
+ def __init__(self, storage_path: str | Path | None = None):
1114
+ self.storage_path = Path(storage_path) if storage_path else None
1115
+ self._trends: dict[str, list[QualityTrendPoint]] = defaultdict(list)
1116
+ self._lock = threading.Lock()
1117
+
1118
+ if self.storage_path and self.storage_path.exists():
1119
+ self._load()
1120
+
1121
+ def record(
1122
+ self,
1123
+ rule_name: str,
1124
+ metrics: QualityMetrics,
1125
+ timestamp: datetime | None = None,
1126
+ data_size: int = 0,
1127
+ notes: str = "",
1128
+ ) -> None:
1129
+ """Record a quality measurement.
1130
+
1131
+ Args:
1132
+ rule_name: Name of the rule
1133
+ metrics: Quality metrics
1134
+ timestamp: When measured (defaults to now)
1135
+ data_size: Size of data evaluated
1136
+ notes: Optional notes
1137
+ """
1138
+ point = QualityTrendPoint(
1139
+ timestamp=timestamp or datetime.now(),
1140
+ metrics=metrics,
1141
+ data_size=data_size,
1142
+ notes=notes,
1143
+ )
1144
+
1145
+ with self._lock:
1146
+ self._trends[rule_name].append(point)
1147
+ # Keep last 100 points per rule
1148
+ if len(self._trends[rule_name]) > 100:
1149
+ self._trends[rule_name] = self._trends[rule_name][-100:]
1150
+
1151
+ if self.storage_path:
1152
+ self._save()
1153
+
1154
+ def analyze_trend(
1155
+ self,
1156
+ rule_name: str,
1157
+ window_days: int = 30,
1158
+ ) -> dict[str, Any]:
1159
+ """Analyze quality trend for a rule.
1160
+
1161
+ Args:
1162
+ rule_name: Name of the rule
1163
+ window_days: Days to analyze
1164
+
1165
+ Returns:
1166
+ Trend analysis results
1167
+ """
1168
+ with self._lock:
1169
+ points = self._trends.get(rule_name, [])
1170
+
1171
+ if not points:
1172
+ return {"error": "No data available"}
1173
+
1174
+ # Filter to window
1175
+ cutoff = datetime.now() - timedelta(days=window_days)
1176
+ recent = [p for p in points if p.timestamp > cutoff]
1177
+
1178
+ if len(recent) < 2:
1179
+ return {
1180
+ "current": points[-1].metrics.to_dict() if points else None,
1181
+ "trend": "insufficient_data",
1182
+ }
1183
+
1184
+ # Calculate trend
1185
+ f1_values = [p.metrics.f1_score for p in recent]
1186
+ first_half = sum(f1_values[:len(f1_values)//2]) / (len(f1_values)//2)
1187
+ second_half = sum(f1_values[len(f1_values)//2:]) / (len(f1_values) - len(f1_values)//2)
1188
+
1189
+ change = second_half - first_half
1190
+ if change > 0.05:
1191
+ direction = "improving"
1192
+ elif change < -0.05:
1193
+ direction = "degrading"
1194
+ else:
1195
+ direction = "stable"
1196
+
1197
+ return {
1198
+ "current": recent[-1].metrics.to_dict(),
1199
+ "trend": direction,
1200
+ "change": change,
1201
+ "points_analyzed": len(recent),
1202
+ "oldest_point": recent[0].timestamp.isoformat(),
1203
+ "newest_point": recent[-1].timestamp.isoformat(),
1204
+ "f1_min": min(f1_values),
1205
+ "f1_max": max(f1_values),
1206
+ "f1_mean": sum(f1_values) / len(f1_values),
1207
+ }
1208
+
1209
+ def get_history(
1210
+ self,
1211
+ rule_name: str,
1212
+ limit: int = 50,
1213
+ ) -> list[dict[str, Any]]:
1214
+ """Get quality history for a rule.
1215
+
1216
+ Args:
1217
+ rule_name: Name of the rule
1218
+ limit: Maximum points to return
1219
+
1220
+ Returns:
1221
+ List of historical measurements
1222
+ """
1223
+ with self._lock:
1224
+ points = self._trends.get(rule_name, [])
1225
+
1226
+ return [
1227
+ {
1228
+ "timestamp": p.timestamp.isoformat(),
1229
+ "metrics": p.metrics.to_dict(),
1230
+ "data_size": p.data_size,
1231
+ "notes": p.notes,
1232
+ }
1233
+ for p in points[-limit:]
1234
+ ]
1235
+
1236
+ def _save(self) -> None:
1237
+ """Save trends to storage."""
1238
+ if not self.storage_path:
1239
+ return
1240
+
1241
+ data = {}
1242
+ with self._lock:
1243
+ for rule_name, points in self._trends.items():
1244
+ data[rule_name] = [
1245
+ {
1246
+ "timestamp": p.timestamp.isoformat(),
1247
+ "metrics": p.metrics.to_dict(),
1248
+ "data_size": p.data_size,
1249
+ "notes": p.notes,
1250
+ }
1251
+ for p in points
1252
+ ]
1253
+
1254
+ with open(self.storage_path, "w") as f:
1255
+ json.dump(data, f)
1256
+
1257
+ def _load(self) -> None:
1258
+ """Load trends from storage."""
1259
+ if not self.storage_path or not self.storage_path.exists():
1260
+ return
1261
+
1262
+ try:
1263
+ with open(self.storage_path) as f:
1264
+ data = json.load(f)
1265
+
1266
+ for rule_name, points in data.items():
1267
+ self._trends[rule_name] = [
1268
+ QualityTrendPoint(
1269
+ timestamp=datetime.fromisoformat(p["timestamp"]),
1270
+ metrics=QualityMetrics(**{
1271
+ k: v for k, v in p["metrics"].items()
1272
+ if k in QualityMetrics.__dataclass_fields__
1273
+ and k != "confusion_matrix"
1274
+ }),
1275
+ data_size=p.get("data_size", 0),
1276
+ notes=p.get("notes", ""),
1277
+ )
1278
+ for p in points
1279
+ ]
1280
+ except Exception:
1281
+ pass
1282
+
1283
+
1284
+ # =============================================================================
1285
+ # Convenience Functions
1286
+ # =============================================================================
1287
+
1288
+
1289
+ def estimate_quality(
1290
+ rule: RuleProtocol | ValidationRule,
1291
+ data: pl.DataFrame,
1292
+ estimator: str = "sampling",
1293
+ **kwargs: Any,
1294
+ ) -> QualityMetrics:
1295
+ """Estimate quality metrics for a rule.
1296
+
1297
+ Args:
1298
+ rule: Rule to evaluate
1299
+ data: Data to evaluate against
1300
+ estimator: Estimator type to use
1301
+ **kwargs: Estimator options
1302
+
1303
+ Returns:
1304
+ Quality metrics
1305
+ """
1306
+ est = quality_estimator_registry.create(estimator, **kwargs)
1307
+ return est.estimate(rule, data)
1308
+
1309
+
1310
+ def score_rule(
1311
+ rule: RuleProtocol | ValidationRule,
1312
+ data: pl.DataFrame,
1313
+ **kwargs: Any,
1314
+ ) -> RuleQualityScore:
1315
+ """Score a rule's quality.
1316
+
1317
+ Args:
1318
+ rule: Rule to score
1319
+ data: Data to evaluate against
1320
+ **kwargs: Scorer options
1321
+
1322
+ Returns:
1323
+ Complete quality score
1324
+ """
1325
+ scorer = RuleQualityScorer(**kwargs)
1326
+ return scorer.score(rule, data)
1327
+
1328
+
1329
+ def compare_rules(
1330
+ rules: list[RuleProtocol | ValidationRule],
1331
+ data: pl.DataFrame,
1332
+ **kwargs: Any,
1333
+ ) -> list[RuleQualityScore]:
1334
+ """Compare multiple rules by quality.
1335
+
1336
+ Args:
1337
+ rules: Rules to compare
1338
+ data: Data to evaluate against
1339
+ **kwargs: Scorer options
1340
+
1341
+ Returns:
1342
+ Scores sorted by quality (best first)
1343
+ """
1344
+ scorer = RuleQualityScorer(**kwargs)
1345
+ return scorer.compare(rules, data)