truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1658 @@
1
+ """Enterprise features for validators.
2
+
3
+ This module provides production-ready integrations:
4
+ - #14: Audit logging integration (who/when/what)
5
+ - #15: Metrics collection (Prometheus/StatsD)
6
+ - #16: Reference data caching
7
+ - #17: Parallel processing support
8
+ - #18: Configuration validation
9
+ - #19: Polars version compatibility
10
+ - #20: Internationalization support
11
+
12
+ These features integrate with the existing audit, observability, and cache modules.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ import functools
18
+ import hashlib
19
+ import locale
20
+ import os
21
+ import sys
22
+ import threading
23
+ import time
24
+ import warnings
25
+ from abc import ABC, abstractmethod
26
+ from concurrent.futures import ThreadPoolExecutor, ProcessPoolExecutor, as_completed
27
+ from contextlib import contextmanager
28
+ from dataclasses import dataclass, field
29
+ from datetime import datetime, timedelta
30
+ from enum import Enum
31
+ from pathlib import Path
32
+ from typing import (
33
+ Any,
34
+ Callable,
35
+ Generic,
36
+ Iterator,
37
+ Literal,
38
+ Mapping,
39
+ Protocol,
40
+ Sequence,
41
+ TypeVar,
42
+ overload,
43
+ )
44
+
45
+ import polars as pl
46
+
47
+ from truthound.types import Severity
48
+ from truthound.validators.base import (
49
+ Validator,
50
+ ValidatorConfig,
51
+ ValidationIssue,
52
+ ValidatorExecutionResult,
53
+ ValidationResult,
54
+ ValidatorLogger,
55
+ ValidationErrorContext,
56
+ GracefulValidator,
57
+ )
58
+
59
+
60
+ # =============================================================================
61
+ # #14: Audit Logging Integration
62
+ # =============================================================================
63
+
64
+
65
+ @dataclass
66
+ class ValidationAuditRecord:
67
+ """Audit record for validation operations.
68
+
69
+ Tracks who, when, what, and results of validation.
70
+ """
71
+
72
+ # Who
73
+ user_id: str | None = None
74
+ user_name: str | None = None
75
+ service_name: str | None = None
76
+ client_ip: str | None = None
77
+
78
+ # When
79
+ timestamp: datetime = field(default_factory=datetime.utcnow)
80
+ duration_ms: float = 0.0
81
+
82
+ # What
83
+ validator_name: str = ""
84
+ validator_category: str = ""
85
+ data_source: str = ""
86
+ row_count: int = 0
87
+ column_count: int = 0
88
+ columns_validated: tuple[str, ...] = ()
89
+
90
+ # Results
91
+ issues_found: int = 0
92
+ severity_counts: dict[str, int] = field(default_factory=dict)
93
+ status: str = "unknown"
94
+ error_message: str | None = None
95
+
96
+ # Context
97
+ session_id: str | None = None
98
+ request_id: str | None = None
99
+ environment: str = "unknown"
100
+ metadata: dict[str, Any] = field(default_factory=dict)
101
+
102
+ def to_dict(self) -> dict[str, Any]:
103
+ """Convert to dictionary for serialization."""
104
+ return {
105
+ "user_id": self.user_id,
106
+ "user_name": self.user_name,
107
+ "service_name": self.service_name,
108
+ "client_ip": self.client_ip,
109
+ "timestamp": self.timestamp.isoformat(),
110
+ "duration_ms": self.duration_ms,
111
+ "validator_name": self.validator_name,
112
+ "validator_category": self.validator_category,
113
+ "data_source": self.data_source,
114
+ "row_count": self.row_count,
115
+ "column_count": self.column_count,
116
+ "columns_validated": list(self.columns_validated),
117
+ "issues_found": self.issues_found,
118
+ "severity_counts": self.severity_counts,
119
+ "status": self.status,
120
+ "error_message": self.error_message,
121
+ "session_id": self.session_id,
122
+ "request_id": self.request_id,
123
+ "environment": self.environment,
124
+ "metadata": self.metadata,
125
+ }
126
+
127
+
128
+ class ValidationAuditLogger:
129
+ """Audit logger specifically for validation operations.
130
+
131
+ Integrates with the main audit system when available.
132
+ """
133
+
134
+ _instance: "ValidationAuditLogger | None" = None
135
+ _lock = threading.Lock()
136
+
137
+ def __init__(self) -> None:
138
+ self._records: list[ValidationAuditRecord] = []
139
+ self._audit_logger: Any = None
140
+ self._enabled = True
141
+ self._max_records = 10000 # In-memory limit
142
+ self.logger = ValidatorLogger("ValidationAuditLogger")
143
+
144
+ # Try to integrate with main audit system
145
+ self._init_audit_integration()
146
+
147
+ def _init_audit_integration(self) -> None:
148
+ """Initialize integration with truthound.audit if available."""
149
+ try:
150
+ from truthound.audit import get_audit_logger, AuditEventType
151
+
152
+ self._audit_logger = get_audit_logger()
153
+ self._audit_event_type = AuditEventType
154
+ self.logger.debug("Integrated with truthound.audit system")
155
+ except (ImportError, Exception):
156
+ self._audit_logger = None
157
+ self.logger.debug("truthound.audit not available, using standalone mode")
158
+
159
+ @classmethod
160
+ def get_instance(cls) -> "ValidationAuditLogger":
161
+ """Get or create singleton instance."""
162
+ if cls._instance is None:
163
+ with cls._lock:
164
+ if cls._instance is None:
165
+ cls._instance = cls()
166
+ return cls._instance
167
+
168
+ def log_validation(
169
+ self,
170
+ validator: Validator,
171
+ lf: pl.LazyFrame,
172
+ result: ValidatorExecutionResult,
173
+ user_id: str | None = None,
174
+ session_id: str | None = None,
175
+ data_source: str | None = None,
176
+ **metadata: Any,
177
+ ) -> ValidationAuditRecord:
178
+ """Log a validation operation.
179
+
180
+ Args:
181
+ validator: The validator that was executed
182
+ lf: The LazyFrame that was validated
183
+ result: The execution result
184
+ user_id: Optional user identifier
185
+ session_id: Optional session identifier
186
+ data_source: Description of data source
187
+ **metadata: Additional metadata
188
+
189
+ Returns:
190
+ The created audit record
191
+ """
192
+ if not self._enabled:
193
+ return ValidationAuditRecord()
194
+
195
+ # Collect schema info
196
+ try:
197
+ schema = lf.collect_schema()
198
+ columns = schema.names()
199
+ row_count = lf.select(pl.len()).collect().item()
200
+ except Exception:
201
+ columns = []
202
+ row_count = 0
203
+
204
+ # Build severity counts
205
+ severity_counts: dict[str, int] = {}
206
+ for issue in result.issues:
207
+ sev = issue.severity.value
208
+ severity_counts[sev] = severity_counts.get(sev, 0) + 1
209
+
210
+ # Create audit record
211
+ record = ValidationAuditRecord(
212
+ user_id=user_id or os.environ.get("USER"),
213
+ service_name=os.environ.get("SERVICE_NAME"),
214
+ timestamp=datetime.utcnow(),
215
+ duration_ms=result.execution_time_ms,
216
+ validator_name=result.validator_name,
217
+ validator_category=getattr(validator, "category", "unknown"),
218
+ data_source=data_source or "unknown",
219
+ row_count=row_count,
220
+ column_count=len(columns),
221
+ columns_validated=tuple(columns),
222
+ issues_found=len(result.issues),
223
+ severity_counts=severity_counts,
224
+ status=result.status.value,
225
+ error_message=(
226
+ result.error_context.message if result.error_context else None
227
+ ),
228
+ session_id=session_id,
229
+ environment=os.environ.get("ENVIRONMENT", "development"),
230
+ metadata=metadata,
231
+ )
232
+
233
+ # Store locally
234
+ with self._lock:
235
+ self._records.append(record)
236
+ if len(self._records) > self._max_records:
237
+ self._records = self._records[-self._max_records:]
238
+
239
+ # Forward to main audit system if available
240
+ if self._audit_logger:
241
+ try:
242
+ from truthound.audit import AuditResource, AuditActor
243
+
244
+ self._audit_logger.log(
245
+ event_type=self._audit_event_type.READ,
246
+ action=f"validate_{validator.name}",
247
+ actor=AuditActor(id=user_id or "system"),
248
+ resource=AuditResource(
249
+ id=data_source or "unknown",
250
+ type="dataset",
251
+ ),
252
+ )
253
+ except Exception as e:
254
+ self.logger.debug(f"Audit system forwarding skipped: {e}")
255
+
256
+ return record
257
+
258
+ def get_records(
259
+ self,
260
+ validator_name: str | None = None,
261
+ user_id: str | None = None,
262
+ since: datetime | None = None,
263
+ limit: int = 100,
264
+ ) -> list[ValidationAuditRecord]:
265
+ """Query audit records.
266
+
267
+ Args:
268
+ validator_name: Filter by validator name
269
+ user_id: Filter by user ID
270
+ since: Filter by timestamp
271
+ limit: Maximum records to return
272
+
273
+ Returns:
274
+ List of matching audit records
275
+ """
276
+ with self._lock:
277
+ records = self._records.copy()
278
+
279
+ if validator_name:
280
+ records = [r for r in records if r.validator_name == validator_name]
281
+ if user_id:
282
+ records = [r for r in records if r.user_id == user_id]
283
+ if since:
284
+ records = [r for r in records if r.timestamp >= since]
285
+
286
+ return records[-limit:]
287
+
288
+ def clear(self) -> None:
289
+ """Clear all audit records."""
290
+ with self._lock:
291
+ self._records.clear()
292
+
293
+ def enable(self) -> None:
294
+ """Enable audit logging."""
295
+ self._enabled = True
296
+
297
+ def disable(self) -> None:
298
+ """Disable audit logging."""
299
+ self._enabled = False
300
+
301
+
302
+ def get_validation_audit_logger() -> ValidationAuditLogger:
303
+ """Get the global validation audit logger."""
304
+ return ValidationAuditLogger.get_instance()
305
+
306
+
307
+ # =============================================================================
308
+ # #15: Metrics Collection Integration
309
+ # =============================================================================
310
+
311
+
312
+ class MetricsCollector:
313
+ """Collects metrics for validation operations.
314
+
315
+ Integrates with Prometheus/StatsD through truthound.observability.
316
+ """
317
+
318
+ _instance: "MetricsCollector | None" = None
319
+ _lock = threading.Lock()
320
+
321
+ def __init__(self) -> None:
322
+ self._metrics_system: Any = None
323
+ self._enabled = True
324
+ self._local_stats: dict[str, float] = {}
325
+ self._local_counts: dict[str, int] = {}
326
+ self.logger = ValidatorLogger("MetricsCollector")
327
+
328
+ self._init_metrics_integration()
329
+
330
+ def _init_metrics_integration(self) -> None:
331
+ """Initialize metrics backend integration."""
332
+ try:
333
+ from truthound.observability.metrics import Counter, Histogram, Gauge
334
+
335
+ self._validation_counter = Counter(
336
+ "truthound_validations_total",
337
+ "Total number of validations",
338
+ labels=("validator", "status", "category"),
339
+ )
340
+ self._validation_duration = Histogram(
341
+ "truthound_validation_duration_ms",
342
+ "Validation duration in milliseconds",
343
+ labels=("validator", "category"),
344
+ )
345
+ self._issues_counter = Counter(
346
+ "truthound_issues_total",
347
+ "Total issues found",
348
+ labels=("validator", "severity", "category"),
349
+ )
350
+ self._active_validations = Gauge(
351
+ "truthound_active_validations",
352
+ "Currently running validations",
353
+ labels=("category",),
354
+ )
355
+ self._metrics_system = True
356
+ self.logger.debug("Integrated with truthound.observability.metrics")
357
+ except (ImportError, Exception) as e:
358
+ self._metrics_system = None
359
+ self._validation_counter = None
360
+ self._validation_duration = None
361
+ self._issues_counter = None
362
+ self._active_validations = None
363
+ self.logger.debug(f"Metrics integration not available: {e}")
364
+
365
+ @classmethod
366
+ def get_instance(cls) -> "MetricsCollector":
367
+ """Get or create singleton instance."""
368
+ if cls._instance is None:
369
+ with cls._lock:
370
+ if cls._instance is None:
371
+ cls._instance = cls()
372
+ return cls._instance
373
+
374
+ def record_validation(
375
+ self,
376
+ validator_name: str,
377
+ category: str,
378
+ status: str,
379
+ duration_ms: float,
380
+ issues: list[ValidationIssue],
381
+ ) -> None:
382
+ """Record metrics for a validation.
383
+
384
+ Args:
385
+ validator_name: Name of the validator
386
+ category: Validator category
387
+ status: Execution status
388
+ duration_ms: Duration in milliseconds
389
+ issues: List of validation issues
390
+ """
391
+ if not self._enabled:
392
+ return
393
+
394
+ # Local stats (always available)
395
+ key = f"{validator_name}_{status}"
396
+ with self._lock:
397
+ self._local_counts[key] = self._local_counts.get(key, 0) + 1
398
+ self._local_stats[f"{validator_name}_duration_sum"] = (
399
+ self._local_stats.get(f"{validator_name}_duration_sum", 0) + duration_ms
400
+ )
401
+
402
+ # Forward to metrics system if available
403
+ if self._metrics_system and self._validation_counter:
404
+ try:
405
+ self._validation_counter.inc(
406
+ validator=validator_name,
407
+ status=status,
408
+ category=category,
409
+ )
410
+
411
+ if self._validation_duration:
412
+ self._validation_duration.observe(
413
+ duration_ms,
414
+ validator=validator_name,
415
+ category=category,
416
+ )
417
+
418
+ if self._issues_counter:
419
+ for issue in issues:
420
+ self._issues_counter.inc(
421
+ validator=validator_name,
422
+ severity=issue.severity.value,
423
+ category=category,
424
+ )
425
+ except Exception as e:
426
+ self.logger.debug(f"Metrics recording skipped: {e}")
427
+
428
+ @contextmanager
429
+ def track_validation(
430
+ self,
431
+ validator_name: str,
432
+ category: str,
433
+ ) -> Iterator[dict[str, Any]]:
434
+ """Context manager to track validation execution.
435
+
436
+ Args:
437
+ validator_name: Name of the validator
438
+ category: Validator category
439
+
440
+ Yields:
441
+ Dict to store results
442
+ """
443
+ start_time = time.time()
444
+ result: dict[str, Any] = {"status": "unknown", "issues": []}
445
+
446
+ # Track active validations
447
+ if self._metrics_system and self._active_validations:
448
+ try:
449
+ self._active_validations.inc(category=category)
450
+ except Exception:
451
+ pass
452
+
453
+ try:
454
+ yield result
455
+ finally:
456
+ duration_ms = (time.time() - start_time) * 1000
457
+
458
+ # Decrement active
459
+ if self._metrics_system and self._active_validations:
460
+ try:
461
+ self._active_validations.dec(category=category)
462
+ except Exception:
463
+ pass
464
+
465
+ # Record final metrics
466
+ self.record_validation(
467
+ validator_name=validator_name,
468
+ category=category,
469
+ status=result.get("status", "unknown"),
470
+ duration_ms=duration_ms,
471
+ issues=result.get("issues", []),
472
+ )
473
+
474
+ def get_stats(self) -> dict[str, Any]:
475
+ """Get local statistics summary."""
476
+ with self._lock:
477
+ return {
478
+ "counts": self._local_counts.copy(),
479
+ "stats": self._local_stats.copy(),
480
+ }
481
+
482
+
483
+ def get_metrics_collector() -> MetricsCollector:
484
+ """Get the global metrics collector."""
485
+ return MetricsCollector.get_instance()
486
+
487
+
488
+ # =============================================================================
489
+ # #16: Reference Data Caching
490
+ # =============================================================================
491
+
492
+
493
+ @dataclass
494
+ class CacheEntry:
495
+ """Entry in the reference data cache."""
496
+
497
+ data: pl.LazyFrame | pl.DataFrame
498
+ created_at: datetime
499
+ expires_at: datetime | None
500
+ hits: int = 0
501
+ size_bytes: int = 0
502
+
503
+ def is_expired(self) -> bool:
504
+ """Check if entry has expired."""
505
+ if self.expires_at is None:
506
+ return False
507
+ return datetime.utcnow() > self.expires_at
508
+
509
+
510
+ class ReferentialDataCache:
511
+ """Cache for reference data used in referential integrity checks.
512
+
513
+ Features:
514
+ - LRU eviction policy
515
+ - TTL-based expiration
516
+ - Memory-aware sizing
517
+ - Thread-safe operations
518
+ """
519
+
520
+ _instance: "ReferentialDataCache | None" = None
521
+ _lock = threading.Lock()
522
+
523
+ def __init__(
524
+ self,
525
+ max_entries: int = 100,
526
+ max_size_mb: float = 500,
527
+ default_ttl_seconds: float = 3600,
528
+ ) -> None:
529
+ """Initialize the cache.
530
+
531
+ Args:
532
+ max_entries: Maximum number of entries
533
+ max_size_mb: Maximum total size in MB
534
+ default_ttl_seconds: Default TTL in seconds
535
+ """
536
+ self._cache: dict[str, CacheEntry] = {}
537
+ self._max_entries = max_entries
538
+ self._max_size_bytes = int(max_size_mb * 1024 * 1024)
539
+ self._default_ttl = timedelta(seconds=default_ttl_seconds)
540
+ self._total_size = 0
541
+ self._hits = 0
542
+ self._misses = 0
543
+ self.logger = ValidatorLogger("ReferentialDataCache")
544
+
545
+ @classmethod
546
+ def get_instance(cls) -> "ReferentialDataCache":
547
+ """Get or create singleton instance."""
548
+ if cls._instance is None:
549
+ with cls._lock:
550
+ if cls._instance is None:
551
+ cls._instance = cls()
552
+ return cls._instance
553
+
554
+ def _generate_key(
555
+ self,
556
+ source: str,
557
+ column: str,
558
+ query_hash: str | None = None,
559
+ ) -> str:
560
+ """Generate cache key for reference data."""
561
+ parts = [source, column]
562
+ if query_hash:
563
+ parts.append(query_hash)
564
+ key_str = ":".join(parts)
565
+ return hashlib.sha256(key_str.encode()).hexdigest()[:32]
566
+
567
+ def _estimate_size(self, data: pl.LazyFrame | pl.DataFrame) -> int:
568
+ """Estimate memory size of data in bytes."""
569
+ try:
570
+ if isinstance(data, pl.LazyFrame):
571
+ # Collect schema only
572
+ schema = data.collect_schema()
573
+ return len(schema.names()) * 1000 # Rough estimate
574
+ else:
575
+ return data.estimated_size()
576
+ except Exception:
577
+ return 0
578
+
579
+ def _evict_if_needed(self, new_size: int) -> None:
580
+ """Evict entries if needed to make room."""
581
+ # Evict expired entries first
582
+ expired_keys = [k for k, v in self._cache.items() if v.is_expired()]
583
+ for key in expired_keys:
584
+ self._remove_entry(key)
585
+
586
+ # Check entry count
587
+ while len(self._cache) >= self._max_entries:
588
+ self._evict_lru()
589
+
590
+ # Check size
591
+ while self._total_size + new_size > self._max_size_bytes and self._cache:
592
+ self._evict_lru()
593
+
594
+ def _evict_lru(self) -> None:
595
+ """Evict least recently used entry."""
596
+ if not self._cache:
597
+ return
598
+
599
+ # Find entry with lowest hit count
600
+ lru_key = min(self._cache.keys(), key=lambda k: self._cache[k].hits)
601
+ self._remove_entry(lru_key)
602
+
603
+ def _remove_entry(self, key: str) -> None:
604
+ """Remove an entry from cache."""
605
+ if key in self._cache:
606
+ entry = self._cache.pop(key)
607
+ self._total_size -= entry.size_bytes
608
+
609
+ def get(
610
+ self,
611
+ source: str,
612
+ column: str,
613
+ query_hash: str | None = None,
614
+ ) -> pl.LazyFrame | pl.DataFrame | None:
615
+ """Get cached reference data.
616
+
617
+ Args:
618
+ source: Data source identifier
619
+ column: Reference column name
620
+ query_hash: Optional query hash for filtered data
621
+
622
+ Returns:
623
+ Cached data or None
624
+ """
625
+ key = self._generate_key(source, column, query_hash)
626
+
627
+ with self._lock:
628
+ entry = self._cache.get(key)
629
+ if entry is None:
630
+ self._misses += 1
631
+ return None
632
+
633
+ if entry.is_expired():
634
+ self._remove_entry(key)
635
+ self._misses += 1
636
+ return None
637
+
638
+ entry.hits += 1
639
+ self._hits += 1
640
+ return entry.data
641
+
642
+ def set(
643
+ self,
644
+ source: str,
645
+ column: str,
646
+ data: pl.LazyFrame | pl.DataFrame,
647
+ ttl_seconds: float | None = None,
648
+ query_hash: str | None = None,
649
+ ) -> None:
650
+ """Store reference data in cache.
651
+
652
+ Args:
653
+ source: Data source identifier
654
+ column: Reference column name
655
+ data: Data to cache
656
+ ttl_seconds: Optional TTL override
657
+ query_hash: Optional query hash for filtered data
658
+ """
659
+ key = self._generate_key(source, column, query_hash)
660
+ size = self._estimate_size(data)
661
+
662
+ # Determine expiration
663
+ ttl = timedelta(seconds=ttl_seconds) if ttl_seconds else self._default_ttl
664
+ expires_at = datetime.utcnow() + ttl
665
+
666
+ entry = CacheEntry(
667
+ data=data,
668
+ created_at=datetime.utcnow(),
669
+ expires_at=expires_at,
670
+ size_bytes=size,
671
+ )
672
+
673
+ with self._lock:
674
+ # Remove existing entry if any
675
+ if key in self._cache:
676
+ self._remove_entry(key)
677
+
678
+ # Evict if needed
679
+ self._evict_if_needed(size)
680
+
681
+ # Add new entry
682
+ self._cache[key] = entry
683
+ self._total_size += size
684
+
685
+ def invalidate(
686
+ self,
687
+ source: str | None = None,
688
+ column: str | None = None,
689
+ ) -> int:
690
+ """Invalidate cached entries.
691
+
692
+ Args:
693
+ source: Optional source to match
694
+ column: Optional column to match
695
+
696
+ Returns:
697
+ Number of entries invalidated
698
+ """
699
+ with self._lock:
700
+ if source is None and column is None:
701
+ count = len(self._cache)
702
+ self._cache.clear()
703
+ self._total_size = 0
704
+ return count
705
+
706
+ keys_to_remove = []
707
+ for key in self._cache:
708
+ # This is a simplistic match - in production you'd want
709
+ # to store source/column in the entry for proper matching
710
+ if source and source in key:
711
+ keys_to_remove.append(key)
712
+ elif column and column in key:
713
+ keys_to_remove.append(key)
714
+
715
+ for key in keys_to_remove:
716
+ self._remove_entry(key)
717
+
718
+ return len(keys_to_remove)
719
+
720
+ def get_stats(self) -> dict[str, Any]:
721
+ """Get cache statistics."""
722
+ with self._lock:
723
+ total_requests = self._hits + self._misses
724
+ return {
725
+ "entries": len(self._cache),
726
+ "total_size_mb": self._total_size / (1024 * 1024),
727
+ "max_size_mb": self._max_size_bytes / (1024 * 1024),
728
+ "hits": self._hits,
729
+ "misses": self._misses,
730
+ "hit_rate": self._hits / max(total_requests, 1),
731
+ }
732
+
733
+
734
+ def get_reference_cache() -> ReferentialDataCache:
735
+ """Get the global reference data cache."""
736
+ return ReferentialDataCache.get_instance()
737
+
738
+
739
+ # =============================================================================
740
+ # #17: Parallel Processing Support
741
+ # =============================================================================
742
+
743
+
744
+ class ParallelExecutionMode(Enum):
745
+ """Execution mode for parallel validation."""
746
+
747
+ SEQUENTIAL = "sequential"
748
+ THREADING = "threading"
749
+ MULTIPROCESSING = "multiprocessing"
750
+
751
+
752
+ @dataclass
753
+ class ParallelExecutionConfig:
754
+ """Configuration for parallel validation execution."""
755
+
756
+ mode: ParallelExecutionMode = ParallelExecutionMode.THREADING
757
+ max_workers: int | None = None # None = auto (CPU count)
758
+ chunk_size: int = 10000
759
+ timeout_seconds: float = 300.0
760
+ fail_fast: bool = False # Stop on first error
761
+
762
+ def get_workers(self) -> int:
763
+ """Get effective worker count."""
764
+ if self.max_workers:
765
+ return self.max_workers
766
+ import os
767
+ return min(os.cpu_count() or 4, 8)
768
+
769
+
770
+ class ParallelValidator:
771
+ """Executes multiple validators in parallel.
772
+
773
+ Features:
774
+ - Thread and process-based parallelism
775
+ - Chunked processing for large datasets
776
+ - Graceful error handling
777
+ - Progress tracking
778
+ """
779
+
780
+ def __init__(
781
+ self,
782
+ validators: list[Validator],
783
+ config: ParallelExecutionConfig | None = None,
784
+ ) -> None:
785
+ """Initialize parallel validator.
786
+
787
+ Args:
788
+ validators: List of validators to execute
789
+ config: Execution configuration
790
+ """
791
+ self.validators = validators
792
+ self.config = config or ParallelExecutionConfig()
793
+ self.logger = ValidatorLogger("ParallelValidator")
794
+
795
+ def validate(
796
+ self,
797
+ lf: pl.LazyFrame,
798
+ progress_callback: Callable[[int, int], None] | None = None,
799
+ ) -> list[ValidatorExecutionResult]:
800
+ """Execute all validators.
801
+
802
+ Args:
803
+ lf: LazyFrame to validate
804
+ progress_callback: Optional callback(completed, total)
805
+
806
+ Returns:
807
+ List of execution results
808
+ """
809
+ if self.config.mode == ParallelExecutionMode.SEQUENTIAL:
810
+ return self._validate_sequential(lf, progress_callback)
811
+ elif self.config.mode == ParallelExecutionMode.THREADING:
812
+ return self._validate_threaded(lf, progress_callback)
813
+ else:
814
+ return self._validate_multiprocess(lf, progress_callback)
815
+
816
+ def _validate_sequential(
817
+ self,
818
+ lf: pl.LazyFrame,
819
+ progress_callback: Callable[[int, int], None] | None,
820
+ ) -> list[ValidatorExecutionResult]:
821
+ """Execute validators sequentially."""
822
+ results: list[ValidatorExecutionResult] = []
823
+ total = len(self.validators)
824
+
825
+ for i, validator in enumerate(self.validators):
826
+ try:
827
+ result = validator.validate_safe(lf)
828
+ results.append(result)
829
+
830
+ if (
831
+ self.config.fail_fast
832
+ and result.status == ValidationResult.FAILED
833
+ ):
834
+ break
835
+
836
+ except Exception as e:
837
+ self.logger.error(f"Validator {validator.name} failed: {e}")
838
+ results.append(
839
+ ValidatorExecutionResult(
840
+ validator_name=validator.name,
841
+ status=ValidationResult.FAILED,
842
+ issues=[],
843
+ error_context=ValidationErrorContext(
844
+ validator_name=validator.name,
845
+ error_type="execution_error",
846
+ message=str(e),
847
+ exception=e,
848
+ ),
849
+ )
850
+ )
851
+
852
+ if progress_callback:
853
+ progress_callback(i + 1, total)
854
+
855
+ return results
856
+
857
+ def _validate_threaded(
858
+ self,
859
+ lf: pl.LazyFrame,
860
+ progress_callback: Callable[[int, int], None] | None,
861
+ ) -> list[ValidatorExecutionResult]:
862
+ """Execute validators using threading."""
863
+ results: list[ValidatorExecutionResult] = []
864
+ total = len(self.validators)
865
+ completed = 0
866
+
867
+ def validate_one(validator: Validator) -> ValidatorExecutionResult:
868
+ try:
869
+ return validator.validate_safe(lf)
870
+ except Exception as e:
871
+ return ValidatorExecutionResult(
872
+ validator_name=validator.name,
873
+ status=ValidationResult.FAILED,
874
+ issues=[],
875
+ error_context=ValidationErrorContext(
876
+ validator_name=validator.name,
877
+ error_type="execution_error",
878
+ message=str(e),
879
+ exception=e,
880
+ ),
881
+ )
882
+
883
+ with ThreadPoolExecutor(max_workers=self.config.get_workers()) as executor:
884
+ futures = {
885
+ executor.submit(validate_one, v): v for v in self.validators
886
+ }
887
+
888
+ for future in as_completed(futures):
889
+ result = future.result()
890
+ results.append(result)
891
+ completed += 1
892
+
893
+ if progress_callback:
894
+ progress_callback(completed, total)
895
+
896
+ if (
897
+ self.config.fail_fast
898
+ and result.status == ValidationResult.FAILED
899
+ ):
900
+ executor.shutdown(wait=False, cancel_futures=True)
901
+ break
902
+
903
+ return results
904
+
905
+ def _validate_multiprocess(
906
+ self,
907
+ lf: pl.LazyFrame,
908
+ progress_callback: Callable[[int, int], None] | None,
909
+ ) -> list[ValidatorExecutionResult]:
910
+ """Execute validators using multiprocessing.
911
+
912
+ Note: This requires validators to be picklable.
913
+ Falls back to threading if multiprocessing fails.
914
+ """
915
+ try:
916
+ # Collect DataFrame for multiprocessing
917
+ df = lf.collect()
918
+ results: list[ValidatorExecutionResult] = []
919
+ total = len(self.validators)
920
+ completed = 0
921
+
922
+ def validate_one(args: tuple) -> ValidatorExecutionResult:
923
+ validator, data = args
924
+ try:
925
+ return validator.validate_safe(data.lazy())
926
+ except Exception as e:
927
+ return ValidatorExecutionResult(
928
+ validator_name=validator.name,
929
+ status=ValidationResult.FAILED,
930
+ issues=[],
931
+ )
932
+
933
+ with ProcessPoolExecutor(max_workers=self.config.get_workers()) as executor:
934
+ futures = {
935
+ executor.submit(validate_one, (v, df)): v
936
+ for v in self.validators
937
+ }
938
+
939
+ for future in as_completed(futures):
940
+ try:
941
+ result = future.result(timeout=self.config.timeout_seconds)
942
+ results.append(result)
943
+ except Exception as e:
944
+ validator = futures[future]
945
+ results.append(
946
+ ValidatorExecutionResult(
947
+ validator_name=validator.name,
948
+ status=ValidationResult.FAILED,
949
+ issues=[],
950
+ )
951
+ )
952
+
953
+ completed += 1
954
+ if progress_callback:
955
+ progress_callback(completed, total)
956
+
957
+ return results
958
+
959
+ except Exception as e:
960
+ self.logger.warning(
961
+ f"Multiprocessing failed, falling back to threading: {e}"
962
+ )
963
+ return self._validate_threaded(lf, progress_callback)
964
+
965
+
966
+ # =============================================================================
967
+ # #18: Configuration Validation
968
+ # =============================================================================
969
+
970
+
971
+ @dataclass
972
+ class ConfigValidationError:
973
+ """Error in configuration validation."""
974
+
975
+ field: str
976
+ message: str
977
+ value: Any
978
+ suggestion: str | None = None
979
+
980
+
981
+ class ConfigValidator:
982
+ """Validates ValidatorConfig settings.
983
+
984
+ Catches configuration errors early with helpful messages.
985
+ """
986
+
987
+ @classmethod
988
+ def validate(
989
+ cls,
990
+ config: ValidatorConfig,
991
+ validator_name: str = "",
992
+ ) -> list[ConfigValidationError]:
993
+ """Validate configuration.
994
+
995
+ Args:
996
+ config: Configuration to validate
997
+ validator_name: Name of validator for context
998
+
999
+ Returns:
1000
+ List of validation errors (empty if valid)
1001
+ """
1002
+ errors: list[ConfigValidationError] = []
1003
+
1004
+ # Validate sample_size
1005
+ if config.sample_size < 0:
1006
+ errors.append(
1007
+ ConfigValidationError(
1008
+ field="sample_size",
1009
+ message="sample_size must be >= 0",
1010
+ value=config.sample_size,
1011
+ suggestion="Use sample_size=0 to disable sampling",
1012
+ )
1013
+ )
1014
+ elif config.sample_size > 10000:
1015
+ errors.append(
1016
+ ConfigValidationError(
1017
+ field="sample_size",
1018
+ message="sample_size > 10000 may cause memory issues",
1019
+ value=config.sample_size,
1020
+ suggestion="Consider using sample_size=100 for typical use cases",
1021
+ )
1022
+ )
1023
+
1024
+ # Validate mostly
1025
+ if config.mostly is not None:
1026
+ if not (0.0 <= config.mostly <= 1.0):
1027
+ errors.append(
1028
+ ConfigValidationError(
1029
+ field="mostly",
1030
+ message="mostly must be in [0.0, 1.0]",
1031
+ value=config.mostly,
1032
+ suggestion="Use mostly=0.95 for 95% pass rate",
1033
+ )
1034
+ )
1035
+
1036
+ # Validate timeout
1037
+ if config.timeout_seconds is not None:
1038
+ if config.timeout_seconds <= 0:
1039
+ errors.append(
1040
+ ConfigValidationError(
1041
+ field="timeout_seconds",
1042
+ message="timeout_seconds must be > 0",
1043
+ value=config.timeout_seconds,
1044
+ suggestion="Use timeout_seconds=None to disable timeout",
1045
+ )
1046
+ )
1047
+ elif config.timeout_seconds < 1:
1048
+ errors.append(
1049
+ ConfigValidationError(
1050
+ field="timeout_seconds",
1051
+ message="timeout_seconds < 1 may cause false timeouts",
1052
+ value=config.timeout_seconds,
1053
+ suggestion="Use at least timeout_seconds=1",
1054
+ )
1055
+ )
1056
+
1057
+ # Validate memory limit
1058
+ if config.memory_limit_mb is not None:
1059
+ if config.memory_limit_mb <= 0:
1060
+ errors.append(
1061
+ ConfigValidationError(
1062
+ field="memory_limit_mb",
1063
+ message="memory_limit_mb must be > 0",
1064
+ value=config.memory_limit_mb,
1065
+ suggestion="Use memory_limit_mb=None to disable limit",
1066
+ )
1067
+ )
1068
+ elif config.memory_limit_mb < 10:
1069
+ errors.append(
1070
+ ConfigValidationError(
1071
+ field="memory_limit_mb",
1072
+ message="memory_limit_mb < 10 may be too restrictive",
1073
+ value=config.memory_limit_mb,
1074
+ suggestion="Use at least memory_limit_mb=100",
1075
+ )
1076
+ )
1077
+
1078
+ # Validate columns
1079
+ if config.columns:
1080
+ for col in config.columns:
1081
+ if not col or not col.strip():
1082
+ errors.append(
1083
+ ConfigValidationError(
1084
+ field="columns",
1085
+ message="Column name cannot be empty",
1086
+ value=col,
1087
+ suggestion="Remove empty column names",
1088
+ )
1089
+ )
1090
+
1091
+ return errors
1092
+
1093
+ @classmethod
1094
+ def validate_or_raise(
1095
+ cls,
1096
+ config: ValidatorConfig,
1097
+ validator_name: str = "",
1098
+ ) -> None:
1099
+ """Validate configuration and raise on error.
1100
+
1101
+ Raises:
1102
+ ValueError: If configuration is invalid
1103
+ """
1104
+ errors = cls.validate(config, validator_name)
1105
+ if errors:
1106
+ error_msgs = [f" - {e.field}: {e.message}" for e in errors]
1107
+ raise ValueError(
1108
+ f"Invalid configuration for {validator_name or 'validator'}:\n"
1109
+ + "\n".join(error_msgs)
1110
+ )
1111
+
1112
+
1113
+ # =============================================================================
1114
+ # #19: Polars Version Compatibility
1115
+ # =============================================================================
1116
+
1117
+
1118
+ @dataclass
1119
+ class PolarsVersionInfo:
1120
+ """Polars version information."""
1121
+
1122
+ major: int
1123
+ minor: int
1124
+ patch: int
1125
+ raw: str
1126
+
1127
+ @classmethod
1128
+ def current(cls) -> "PolarsVersionInfo":
1129
+ """Get current Polars version."""
1130
+ version_str = pl.__version__
1131
+ parts = version_str.split(".")
1132
+ return cls(
1133
+ major=int(parts[0]) if len(parts) > 0 else 0,
1134
+ minor=int(parts[1]) if len(parts) > 1 else 0,
1135
+ patch=int(parts[2].split("-")[0]) if len(parts) > 2 else 0,
1136
+ raw=version_str,
1137
+ )
1138
+
1139
+ def __ge__(self, other: tuple[int, int, int]) -> bool:
1140
+ return (self.major, self.minor, self.patch) >= other
1141
+
1142
+ def __lt__(self, other: tuple[int, int, int]) -> bool:
1143
+ return (self.major, self.minor, self.patch) < other
1144
+
1145
+
1146
+ class PolarsCompat:
1147
+ """Polars version compatibility layer.
1148
+
1149
+ Provides compatible implementations for API changes between versions.
1150
+ """
1151
+
1152
+ _version: PolarsVersionInfo | None = None
1153
+
1154
+ @classmethod
1155
+ def version(cls) -> PolarsVersionInfo:
1156
+ """Get cached Polars version."""
1157
+ if cls._version is None:
1158
+ cls._version = PolarsVersionInfo.current()
1159
+ return cls._version
1160
+
1161
+ @classmethod
1162
+ def collect_schema(cls, lf: pl.LazyFrame) -> pl.Schema:
1163
+ """Get schema from LazyFrame (compatible across versions)."""
1164
+ v = cls.version()
1165
+ if v >= (0, 20, 0):
1166
+ return lf.collect_schema()
1167
+ else:
1168
+ # Older versions
1169
+ return lf.schema # type: ignore
1170
+
1171
+ @classmethod
1172
+ def estimated_size(cls, df: pl.DataFrame) -> int:
1173
+ """Get estimated size in bytes (compatible across versions)."""
1174
+ v = cls.version()
1175
+ try:
1176
+ if v >= (0, 19, 0):
1177
+ return df.estimated_size()
1178
+ else:
1179
+ return df.estimated_size("b") # type: ignore
1180
+ except Exception:
1181
+ # Fallback estimation
1182
+ return len(df) * len(df.columns) * 8
1183
+
1184
+ @classmethod
1185
+ def str_contains(
1186
+ cls,
1187
+ expr: pl.Expr,
1188
+ pattern: str,
1189
+ literal: bool = False,
1190
+ ) -> pl.Expr:
1191
+ """String contains (compatible across versions)."""
1192
+ v = cls.version()
1193
+ if v >= (0, 19, 0):
1194
+ return expr.str.contains(pattern, literal=literal)
1195
+ else:
1196
+ # Older API
1197
+ if literal:
1198
+ return expr.str.contains(pattern, literal=True) # type: ignore
1199
+ return expr.str.contains(pattern)
1200
+
1201
+ @classmethod
1202
+ def null_count(cls, lf: pl.LazyFrame, col: str) -> pl.Expr:
1203
+ """Count nulls in column (compatible across versions)."""
1204
+ v = cls.version()
1205
+ if v >= (0, 18, 0):
1206
+ return pl.col(col).null_count()
1207
+ else:
1208
+ return pl.col(col).is_null().sum()
1209
+
1210
+ @classmethod
1211
+ def check_min_version(
1212
+ cls,
1213
+ min_version: tuple[int, int, int],
1214
+ feature: str = "",
1215
+ ) -> bool:
1216
+ """Check if current Polars meets minimum version.
1217
+
1218
+ Args:
1219
+ min_version: Minimum required version (major, minor, patch)
1220
+ feature: Feature name for warning message
1221
+
1222
+ Returns:
1223
+ True if version requirement is met
1224
+ """
1225
+ v = cls.version()
1226
+ if v < min_version:
1227
+ ver_str = ".".join(map(str, min_version))
1228
+ msg = f"Polars {ver_str}+ required"
1229
+ if feature:
1230
+ msg += f" for {feature}"
1231
+ msg += f", current: {v.raw}"
1232
+ warnings.warn(msg, UserWarning, stacklevel=2)
1233
+ return False
1234
+ return True
1235
+
1236
+
1237
+ # =============================================================================
1238
+ # #20: Internationalization Support
1239
+ # =============================================================================
1240
+
1241
+
1242
+ class Language(Enum):
1243
+ """Supported languages for error messages."""
1244
+
1245
+ EN = "en"
1246
+ KO = "ko"
1247
+ JA = "ja"
1248
+ ZH = "zh"
1249
+ ES = "es"
1250
+ FR = "fr"
1251
+ DE = "de"
1252
+
1253
+
1254
+ # Translation dictionaries
1255
+ _TRANSLATIONS: dict[str, dict[str, str]] = {
1256
+ # Issue types
1257
+ "null_values": {
1258
+ "en": "Null values found",
1259
+ "ko": "null 값 발견",
1260
+ "ja": "null値が見つかりました",
1261
+ "zh": "发现空值",
1262
+ "es": "Valores nulos encontrados",
1263
+ "fr": "Valeurs nulles trouvées",
1264
+ "de": "Null-Werte gefunden",
1265
+ },
1266
+ "out_of_range": {
1267
+ "en": "Values out of range",
1268
+ "ko": "범위를 벗어난 값",
1269
+ "ja": "範囲外の値",
1270
+ "zh": "超出范围的值",
1271
+ "es": "Valores fuera de rango",
1272
+ "fr": "Valeurs hors limites",
1273
+ "de": "Werte außerhalb des Bereichs",
1274
+ },
1275
+ "duplicate_values": {
1276
+ "en": "Duplicate values found",
1277
+ "ko": "중복 값 발견",
1278
+ "ja": "重複値が見つかりました",
1279
+ "zh": "发现重复值",
1280
+ "es": "Valores duplicados encontrados",
1281
+ "fr": "Valeurs en double trouvées",
1282
+ "de": "Doppelte Werte gefunden",
1283
+ },
1284
+ "invalid_format": {
1285
+ "en": "Invalid format",
1286
+ "ko": "잘못된 형식",
1287
+ "ja": "無効な形式",
1288
+ "zh": "格式无效",
1289
+ "es": "Formato inválido",
1290
+ "fr": "Format invalide",
1291
+ "de": "Ungültiges Format",
1292
+ },
1293
+ "referential_integrity_violation": {
1294
+ "en": "Referential integrity violation",
1295
+ "ko": "참조 무결성 위반",
1296
+ "ja": "参照整合性違反",
1297
+ "zh": "引用完整性违规",
1298
+ "es": "Violación de integridad referencial",
1299
+ "fr": "Violation d'intégrité référentielle",
1300
+ "de": "Referenzielle Integritätsverletzung",
1301
+ },
1302
+ "schema_mismatch": {
1303
+ "en": "Schema mismatch",
1304
+ "ko": "스키마 불일치",
1305
+ "ja": "スキーマ不一致",
1306
+ "zh": "模式不匹配",
1307
+ "es": "Desajuste de esquema",
1308
+ "fr": "Non-concordance de schéma",
1309
+ "de": "Schema-Abweichung",
1310
+ },
1311
+ # Severity levels
1312
+ "critical": {
1313
+ "en": "Critical",
1314
+ "ko": "심각",
1315
+ "ja": "重大",
1316
+ "zh": "严重",
1317
+ "es": "Crítico",
1318
+ "fr": "Critique",
1319
+ "de": "Kritisch",
1320
+ },
1321
+ "high": {
1322
+ "en": "High",
1323
+ "ko": "높음",
1324
+ "ja": "高",
1325
+ "zh": "高",
1326
+ "es": "Alto",
1327
+ "fr": "Élevé",
1328
+ "de": "Hoch",
1329
+ },
1330
+ "medium": {
1331
+ "en": "Medium",
1332
+ "ko": "중간",
1333
+ "ja": "中",
1334
+ "zh": "中",
1335
+ "es": "Medio",
1336
+ "fr": "Moyen",
1337
+ "de": "Mittel",
1338
+ },
1339
+ "low": {
1340
+ "en": "Low",
1341
+ "ko": "낮음",
1342
+ "ja": "低",
1343
+ "zh": "低",
1344
+ "es": "Bajo",
1345
+ "fr": "Faible",
1346
+ "de": "Niedrig",
1347
+ },
1348
+ # Common messages
1349
+ "values_found": {
1350
+ "en": "{count} values found",
1351
+ "ko": "{count}개의 값 발견",
1352
+ "ja": "{count}個の値が見つかりました",
1353
+ "zh": "发现{count}个值",
1354
+ "es": "{count} valores encontrados",
1355
+ "fr": "{count} valeurs trouvées",
1356
+ "de": "{count} Werte gefunden",
1357
+ },
1358
+ "column_not_found": {
1359
+ "en": "Column '{column}' not found",
1360
+ "ko": "'{column}' 컬럼을 찾을 수 없습니다",
1361
+ "ja": "'{column}' 列が見つかりません",
1362
+ "zh": "未找到'{column}'列",
1363
+ "es": "Columna '{column}' no encontrada",
1364
+ "fr": "Colonne '{column}' non trouvée",
1365
+ "de": "Spalte '{column}' nicht gefunden",
1366
+ },
1367
+ "validation_passed": {
1368
+ "en": "Validation passed",
1369
+ "ko": "검증 통과",
1370
+ "ja": "検証通過",
1371
+ "zh": "验证通过",
1372
+ "es": "Validación aprobada",
1373
+ "fr": "Validation réussie",
1374
+ "de": "Validierung bestanden",
1375
+ },
1376
+ "validation_failed": {
1377
+ "en": "Validation failed",
1378
+ "ko": "검증 실패",
1379
+ "ja": "検証失敗",
1380
+ "zh": "验证失败",
1381
+ "es": "Validación fallida",
1382
+ "fr": "Validation échouée",
1383
+ "de": "Validierung fehlgeschlagen",
1384
+ },
1385
+ }
1386
+
1387
+
1388
+ class I18n:
1389
+ """Internationalization support for validation messages.
1390
+
1391
+ Features:
1392
+ - Multiple language support
1393
+ - Fallback to English
1394
+ - Auto-detection from locale
1395
+ - Template interpolation
1396
+ """
1397
+
1398
+ _current_language: Language = Language.EN
1399
+ _custom_translations: dict[str, dict[str, str]] = {}
1400
+
1401
+ @classmethod
1402
+ def set_language(cls, lang: Language | str) -> None:
1403
+ """Set the current language.
1404
+
1405
+ Args:
1406
+ lang: Language enum or code string
1407
+ """
1408
+ if isinstance(lang, str):
1409
+ lang = Language(lang.lower())
1410
+ cls._current_language = lang
1411
+
1412
+ @classmethod
1413
+ def get_language(cls) -> Language:
1414
+ """Get the current language."""
1415
+ return cls._current_language
1416
+
1417
+ @classmethod
1418
+ def detect_language(cls) -> Language:
1419
+ """Detect language from system locale."""
1420
+ try:
1421
+ loc = locale.getlocale()[0]
1422
+ if loc:
1423
+ code = loc.split("_")[0].lower()
1424
+ try:
1425
+ return Language(code)
1426
+ except ValueError:
1427
+ pass
1428
+ except Exception:
1429
+ pass
1430
+ return Language.EN
1431
+
1432
+ @classmethod
1433
+ def auto_configure(cls) -> None:
1434
+ """Auto-configure language from environment."""
1435
+ # Check environment variable first
1436
+ env_lang = os.environ.get("TRUTHOUND_LANGUAGE")
1437
+ if env_lang:
1438
+ try:
1439
+ cls.set_language(env_lang)
1440
+ return
1441
+ except ValueError:
1442
+ pass
1443
+
1444
+ # Fall back to locale detection
1445
+ cls.set_language(cls.detect_language())
1446
+
1447
+ @classmethod
1448
+ def add_translations(cls, key: str, translations: dict[str, str]) -> None:
1449
+ """Add custom translations.
1450
+
1451
+ Args:
1452
+ key: Translation key
1453
+ translations: Dict of language code -> text
1454
+ """
1455
+ cls._custom_translations[key] = translations
1456
+
1457
+ @classmethod
1458
+ def t(
1459
+ cls,
1460
+ key: str,
1461
+ lang: Language | None = None,
1462
+ **kwargs: Any,
1463
+ ) -> str:
1464
+ """Translate a key to the specified language.
1465
+
1466
+ Args:
1467
+ key: Translation key
1468
+ lang: Language (default: current)
1469
+ **kwargs: Template interpolation values
1470
+
1471
+ Returns:
1472
+ Translated string
1473
+ """
1474
+ lang = lang or cls._current_language
1475
+ lang_code = lang.value
1476
+
1477
+ # Check custom translations first
1478
+ if key in cls._custom_translations:
1479
+ translations = cls._custom_translations[key]
1480
+ elif key in _TRANSLATIONS:
1481
+ translations = _TRANSLATIONS[key]
1482
+ else:
1483
+ return key # Return key if not found
1484
+
1485
+ # Get translation for language, fallback to English
1486
+ text = translations.get(lang_code, translations.get("en", key))
1487
+
1488
+ # Apply template interpolation
1489
+ if kwargs:
1490
+ try:
1491
+ text = text.format(**kwargs)
1492
+ except KeyError:
1493
+ pass # Keep original if interpolation fails
1494
+
1495
+ return text
1496
+
1497
+ @classmethod
1498
+ def translate_issue(
1499
+ cls,
1500
+ issue: ValidationIssue,
1501
+ lang: Language | None = None,
1502
+ ) -> ValidationIssue:
1503
+ """Translate validation issue to specified language.
1504
+
1505
+ Args:
1506
+ issue: Original validation issue
1507
+ lang: Target language
1508
+
1509
+ Returns:
1510
+ New issue with translated messages
1511
+ """
1512
+ lang = lang or cls._current_language
1513
+
1514
+ # Translate issue type
1515
+ translated_type = cls.t(issue.issue_type, lang)
1516
+
1517
+ # Translate details if it matches a template
1518
+ translated_details = issue.details
1519
+ if issue.details:
1520
+ # Try to match common patterns
1521
+ for key in _TRANSLATIONS:
1522
+ if key in issue.issue_type.lower():
1523
+ translated_details = cls.t(
1524
+ "values_found",
1525
+ lang,
1526
+ count=issue.count,
1527
+ )
1528
+ break
1529
+
1530
+ # Create new issue with translated content
1531
+ return ValidationIssue(
1532
+ column=issue.column,
1533
+ issue_type=translated_type,
1534
+ count=issue.count,
1535
+ severity=issue.severity,
1536
+ details=translated_details,
1537
+ expected=issue.expected,
1538
+ actual=issue.actual,
1539
+ sample_values=issue.sample_values,
1540
+ error_context=issue.error_context,
1541
+ validator_name=issue.validator_name,
1542
+ execution_time_ms=issue.execution_time_ms,
1543
+ )
1544
+
1545
+ @classmethod
1546
+ def translate_severity(cls, severity: Severity, lang: Language | None = None) -> str:
1547
+ """Translate severity level to specified language."""
1548
+ return cls.t(severity.value.lower(), lang)
1549
+
1550
+
1551
+ # Convenience function
1552
+ def translate(key: str, **kwargs: Any) -> str:
1553
+ """Translate a key using current language."""
1554
+ return I18n.t(key, **kwargs)
1555
+
1556
+
1557
+ # =============================================================================
1558
+ # Integration: Enhanced Validator with Enterprise Features
1559
+ # =============================================================================
1560
+
1561
+
1562
+ class EnterpriseValidator(Validator):
1563
+ """Validator with enterprise features enabled.
1564
+
1565
+ Automatically integrates:
1566
+ - Audit logging
1567
+ - Metrics collection
1568
+ - Configuration validation
1569
+ - Polars compatibility checks
1570
+ - Internationalized messages
1571
+
1572
+ Usage:
1573
+ class MyValidator(EnterpriseValidator):
1574
+ name = "my_validator"
1575
+ category = "custom"
1576
+
1577
+ def validate(self, lf):
1578
+ # Your validation logic
1579
+ pass
1580
+ """
1581
+
1582
+ # Enterprise features
1583
+ enable_audit: bool = True
1584
+ enable_metrics: bool = True
1585
+ validate_config: bool = True
1586
+ translate_messages: bool = False
1587
+
1588
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
1589
+ super().__init__(*args, **kwargs)
1590
+
1591
+ # Validate config if enabled
1592
+ if self.validate_config:
1593
+ ConfigValidator.validate_or_raise(self.config, self.name)
1594
+
1595
+ def validate_safe(self, lf: pl.LazyFrame) -> ValidatorExecutionResult:
1596
+ """Execute validation with enterprise features."""
1597
+ audit_logger = get_validation_audit_logger() if self.enable_audit else None
1598
+ metrics = get_metrics_collector() if self.enable_metrics else None
1599
+
1600
+ # Execute with metrics tracking
1601
+ if metrics:
1602
+ with metrics.track_validation(self.name, self.category) as ctx:
1603
+ result = super().validate_safe(lf)
1604
+ ctx["status"] = result.status.value
1605
+ ctx["issues"] = result.issues
1606
+ else:
1607
+ result = super().validate_safe(lf)
1608
+
1609
+ # Log to audit
1610
+ if audit_logger:
1611
+ audit_logger.log_validation(
1612
+ validator=self,
1613
+ lf=lf,
1614
+ result=result,
1615
+ )
1616
+
1617
+ # Translate messages if enabled
1618
+ if self.translate_messages:
1619
+ result.issues = [
1620
+ I18n.translate_issue(issue) for issue in result.issues
1621
+ ]
1622
+
1623
+ return result
1624
+
1625
+
1626
+ # =============================================================================
1627
+ # Exports
1628
+ # =============================================================================
1629
+
1630
+ __all__ = [
1631
+ # Audit (#14)
1632
+ "ValidationAuditRecord",
1633
+ "ValidationAuditLogger",
1634
+ "get_validation_audit_logger",
1635
+ # Metrics (#15)
1636
+ "MetricsCollector",
1637
+ "get_metrics_collector",
1638
+ # Caching (#16)
1639
+ "CacheEntry",
1640
+ "ReferentialDataCache",
1641
+ "get_reference_cache",
1642
+ # Parallel (#17)
1643
+ "ParallelExecutionMode",
1644
+ "ParallelExecutionConfig",
1645
+ "ParallelValidator",
1646
+ # Config Validation (#18)
1647
+ "ConfigValidationError",
1648
+ "ConfigValidator",
1649
+ # Polars Compat (#19)
1650
+ "PolarsVersionInfo",
1651
+ "PolarsCompat",
1652
+ # I18n (#20)
1653
+ "Language",
1654
+ "I18n",
1655
+ "translate",
1656
+ # Enterprise Validator
1657
+ "EnterpriseValidator",
1658
+ ]