truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,756 @@
1
+ """Protocol definitions for distributed execution engines.
2
+
3
+ This module defines the structural typing protocols that all distributed
4
+ execution engine implementations should follow. These protocols enable:
5
+ - Type-safe distributed operations
6
+ - Backend-agnostic interfaces
7
+ - Extensibility for custom backends
8
+
9
+ Design Principles:
10
+ 1. Protocol-first: Define interfaces before implementations
11
+ 2. Composable: Small, focused protocols that can be combined
12
+ 3. Backend-agnostic: Same interface for Spark, Dask, Ray, etc.
13
+ """
14
+
15
+ from __future__ import annotations
16
+
17
+ from abc import ABC, abstractmethod
18
+ from dataclasses import dataclass, field
19
+ from enum import Enum, auto
20
+ from typing import TYPE_CHECKING, Any, Callable, Generic, Iterator, Protocol, TypeVar, runtime_checkable
21
+
22
+ if TYPE_CHECKING:
23
+ import polars as pl
24
+ import pyarrow as pa
25
+
26
+
27
+ # =============================================================================
28
+ # Enums
29
+ # =============================================================================
30
+
31
+
32
+ class ExecutionMode(str, Enum):
33
+ """Execution modes for distributed operations."""
34
+
35
+ EAGER = "eager" # Execute immediately
36
+ LAZY = "lazy" # Build execution plan, execute on collect
37
+ STREAMING = "streaming" # Process data in streaming fashion
38
+
39
+
40
+ class PartitionStrategy(str, Enum):
41
+ """Strategies for data partitioning."""
42
+
43
+ ROW_HASH = "row_hash" # Hash-based row partitioning
44
+ ROW_RANGE = "row_range" # Range-based row partitioning
45
+ COLUMN = "column" # Partition by columns
46
+ ROUND_ROBIN = "round_robin" # Round-robin distribution
47
+ CUSTOM = "custom" # Custom partitioning function
48
+
49
+
50
+ class AggregationScope(str, Enum):
51
+ """Scope of aggregation operations."""
52
+
53
+ GLOBAL = "global" # Aggregate across all partitions
54
+ PARTITION = "partition" # Aggregate within partition
55
+ COLUMN = "column" # Aggregate per column
56
+ GROUPED = "grouped" # Aggregate by group key
57
+
58
+
59
+ class ComputeBackend(str, Enum):
60
+ """Supported distributed compute backends."""
61
+
62
+ SPARK = "spark"
63
+ DASK = "dask"
64
+ RAY = "ray"
65
+ LOCAL = "local"
66
+ AUTO = "auto"
67
+
68
+
69
+ # =============================================================================
70
+ # Data Classes
71
+ # =============================================================================
72
+
73
+
74
+ @dataclass(frozen=True)
75
+ class PartitionInfo:
76
+ """Information about a data partition.
77
+
78
+ Attributes:
79
+ partition_id: Unique identifier for this partition.
80
+ total_partitions: Total number of partitions.
81
+ row_start: Starting row index (inclusive).
82
+ row_end: Ending row index (exclusive).
83
+ columns: Columns in this partition.
84
+ size_bytes: Estimated size in bytes.
85
+ host: Host where this partition resides.
86
+ metadata: Additional partition metadata.
87
+ """
88
+
89
+ partition_id: int
90
+ total_partitions: int
91
+ row_start: int = 0
92
+ row_end: int = 0
93
+ columns: tuple[str, ...] = field(default_factory=tuple)
94
+ size_bytes: int = 0
95
+ host: str = ""
96
+ metadata: dict[str, Any] = field(default_factory=dict)
97
+
98
+ @property
99
+ def row_count(self) -> int:
100
+ """Get number of rows in this partition."""
101
+ return self.row_end - self.row_start
102
+
103
+
104
+ @dataclass
105
+ class DistributedResult:
106
+ """Result from a distributed operation.
107
+
108
+ Attributes:
109
+ partition_id: Source partition ID.
110
+ operation: Operation that produced this result.
111
+ value: The computed value.
112
+ row_count: Number of rows processed.
113
+ duration_ms: Processing duration in milliseconds.
114
+ errors: List of errors encountered.
115
+ warnings: List of warnings.
116
+ metadata: Additional result metadata.
117
+ """
118
+
119
+ partition_id: int
120
+ operation: str
121
+ value: Any
122
+ row_count: int = 0
123
+ duration_ms: float = 0.0
124
+ errors: list[str] = field(default_factory=list)
125
+ warnings: list[str] = field(default_factory=list)
126
+ metadata: dict[str, Any] = field(default_factory=dict)
127
+
128
+ @property
129
+ def success(self) -> bool:
130
+ """Check if operation succeeded without errors."""
131
+ return len(self.errors) == 0
132
+
133
+
134
+ @dataclass
135
+ class DistributedAggregation:
136
+ """Specification for a distributed aggregation.
137
+
138
+ Attributes:
139
+ column: Column to aggregate.
140
+ operation: Aggregation operation name.
141
+ params: Additional parameters for the operation.
142
+ alias: Result column alias.
143
+ """
144
+
145
+ column: str
146
+ operation: str # "count", "sum", "mean", "min", "max", "std", "var", etc.
147
+ params: dict[str, Any] = field(default_factory=dict)
148
+ alias: str = ""
149
+
150
+ def __post_init__(self) -> None:
151
+ if not self.alias:
152
+ self.alias = f"{self.column}_{self.operation}"
153
+
154
+
155
+ @dataclass
156
+ class AggregationSpec:
157
+ """Specification for multiple aggregations."""
158
+
159
+ aggregations: list[DistributedAggregation] = field(default_factory=list)
160
+ group_by: list[str] = field(default_factory=list)
161
+ scope: AggregationScope = AggregationScope.GLOBAL
162
+
163
+ def add(
164
+ self,
165
+ column: str,
166
+ operation: str,
167
+ alias: str = "",
168
+ **params: Any,
169
+ ) -> "AggregationSpec":
170
+ """Add an aggregation to the spec."""
171
+ self.aggregations.append(
172
+ DistributedAggregation(
173
+ column=column,
174
+ operation=operation,
175
+ params=params,
176
+ alias=alias,
177
+ )
178
+ )
179
+ return self
180
+
181
+
182
+ # =============================================================================
183
+ # Protocols
184
+ # =============================================================================
185
+
186
+
187
+ T = TypeVar("T")
188
+ ResultT = TypeVar("ResultT")
189
+
190
+
191
+ @runtime_checkable
192
+ class DistributedDataProtocol(Protocol):
193
+ """Protocol for distributed data representations.
194
+
195
+ This protocol abstracts over Spark DataFrames, Dask DataFrames,
196
+ Ray Datasets, etc.
197
+ """
198
+
199
+ @property
200
+ def columns(self) -> list[str]:
201
+ """Get column names."""
202
+ ...
203
+
204
+ @property
205
+ def num_partitions(self) -> int:
206
+ """Get number of partitions."""
207
+ ...
208
+
209
+ def repartition(self, num_partitions: int) -> "DistributedDataProtocol":
210
+ """Repartition the data."""
211
+ ...
212
+
213
+ def get_partition_info(self) -> list[PartitionInfo]:
214
+ """Get information about all partitions."""
215
+ ...
216
+
217
+
218
+ @runtime_checkable
219
+ class MapReduceProtocol(Protocol[T, ResultT]):
220
+ """Protocol for map-reduce style operations.
221
+
222
+ Type Parameters:
223
+ T: Input type for map function.
224
+ ResultT: Output type from reduce function.
225
+ """
226
+
227
+ def map_partitions(
228
+ self,
229
+ func: Callable[[Iterator[T]], Iterator[ResultT]],
230
+ ) -> "MapReduceProtocol[ResultT, ResultT]":
231
+ """Apply function to each partition."""
232
+ ...
233
+
234
+ def reduce(
235
+ self,
236
+ func: Callable[[ResultT, ResultT], ResultT],
237
+ ) -> ResultT:
238
+ """Reduce all partitions to a single value."""
239
+ ...
240
+
241
+ def collect(self) -> list[ResultT]:
242
+ """Collect all results to driver."""
243
+ ...
244
+
245
+
246
+ @runtime_checkable
247
+ class DistributedAggregatorProtocol(Protocol):
248
+ """Protocol for distributed aggregation operations.
249
+
250
+ Aggregators must support:
251
+ - Partial aggregation (per-partition)
252
+ - Final aggregation (cross-partition merge)
253
+ - Incremental updates
254
+ """
255
+
256
+ def initialize(self) -> Any:
257
+ """Initialize accumulator state."""
258
+ ...
259
+
260
+ def accumulate(self, state: Any, value: Any) -> Any:
261
+ """Add a value to the accumulator."""
262
+ ...
263
+
264
+ def merge(self, state1: Any, state2: Any) -> Any:
265
+ """Merge two accumulator states."""
266
+ ...
267
+
268
+ def finalize(self, state: Any) -> Any:
269
+ """Finalize and return the result."""
270
+ ...
271
+
272
+
273
+ @runtime_checkable
274
+ class DistributedBackendProtocol(Protocol):
275
+ """Protocol for distributed computing backends.
276
+
277
+ All distributed backends (Spark, Dask, Ray) must implement
278
+ this protocol to be usable with Truthound's distributed
279
+ execution framework.
280
+ """
281
+
282
+ @property
283
+ def backend_type(self) -> ComputeBackend:
284
+ """Get the backend type."""
285
+ ...
286
+
287
+ @property
288
+ def is_available(self) -> bool:
289
+ """Check if the backend is available."""
290
+ ...
291
+
292
+ def initialize(self) -> None:
293
+ """Initialize the backend (connect, start cluster, etc.)."""
294
+ ...
295
+
296
+ def shutdown(self) -> None:
297
+ """Shutdown the backend (disconnect, stop cluster, etc.)."""
298
+ ...
299
+
300
+ def distribute_data(
301
+ self,
302
+ data: Any,
303
+ num_partitions: int | None = None,
304
+ strategy: PartitionStrategy = PartitionStrategy.ROW_HASH,
305
+ ) -> DistributedDataProtocol:
306
+ """Distribute data across the cluster."""
307
+ ...
308
+
309
+ def map_partitions(
310
+ self,
311
+ data: DistributedDataProtocol,
312
+ func: Callable[[Any], DistributedResult],
313
+ ) -> list[DistributedResult]:
314
+ """Execute function on each partition."""
315
+ ...
316
+
317
+ def aggregate(
318
+ self,
319
+ data: DistributedDataProtocol,
320
+ spec: AggregationSpec,
321
+ ) -> dict[str, Any]:
322
+ """Perform distributed aggregation."""
323
+ ...
324
+
325
+ def collect(
326
+ self,
327
+ data: DistributedDataProtocol,
328
+ limit: int | None = None,
329
+ ) -> Any:
330
+ """Collect distributed data to local."""
331
+ ...
332
+
333
+
334
+ @runtime_checkable
335
+ class ArrowConvertibleProtocol(Protocol):
336
+ """Protocol for types that can convert to/from Arrow.
337
+
338
+ Arrow is used as the zero-copy interchange format between
339
+ different compute backends.
340
+ """
341
+
342
+ def to_arrow(self) -> "pa.Table":
343
+ """Convert to PyArrow Table."""
344
+ ...
345
+
346
+ @classmethod
347
+ def from_arrow(cls, table: "pa.Table") -> Any:
348
+ """Create from PyArrow Table."""
349
+ ...
350
+
351
+
352
+ @runtime_checkable
353
+ class DistributedExecutionProtocol(Protocol):
354
+ """Protocol for distributed execution engines.
355
+
356
+ Execution engines provide the high-level interface for
357
+ running validations in a distributed manner.
358
+ """
359
+
360
+ @property
361
+ def backend(self) -> DistributedBackendProtocol:
362
+ """Get the underlying backend."""
363
+ ...
364
+
365
+ def count_rows(self) -> int:
366
+ """Count total rows (distributed)."""
367
+ ...
368
+
369
+ def count_nulls(self, column: str) -> int:
370
+ """Count nulls in a column (distributed)."""
371
+ ...
372
+
373
+ def count_nulls_all(self) -> dict[str, int]:
374
+ """Count nulls in all columns (distributed)."""
375
+ ...
376
+
377
+ def count_distinct(self, column: str) -> int:
378
+ """Count distinct values (distributed)."""
379
+ ...
380
+
381
+ def get_stats(self, column: str) -> dict[str, Any]:
382
+ """Get column statistics (distributed)."""
383
+ ...
384
+
385
+ def aggregate(self, spec: AggregationSpec) -> dict[str, Any]:
386
+ """Perform distributed aggregation."""
387
+ ...
388
+
389
+ def to_polars_lazyframe(self) -> "pl.LazyFrame":
390
+ """Convert to Polars LazyFrame (via Arrow)."""
391
+ ...
392
+
393
+
394
+ # =============================================================================
395
+ # Abstract Base Classes
396
+ # =============================================================================
397
+
398
+
399
+ class BaseAggregator(ABC, Generic[T]):
400
+ """Abstract base class for distributed aggregators.
401
+
402
+ Aggregators implement the map-reduce pattern for computing
403
+ aggregate statistics across partitions.
404
+
405
+ Type Parameters:
406
+ T: Type of the accumulated state.
407
+ """
408
+
409
+ name: str = "base"
410
+
411
+ @abstractmethod
412
+ def initialize(self) -> T:
413
+ """Create initial accumulator state."""
414
+ pass
415
+
416
+ @abstractmethod
417
+ def accumulate(self, state: T, value: Any) -> T:
418
+ """Add a value to the accumulator."""
419
+ pass
420
+
421
+ @abstractmethod
422
+ def merge(self, state1: T, state2: T) -> T:
423
+ """Merge two accumulator states."""
424
+ pass
425
+
426
+ @abstractmethod
427
+ def finalize(self, state: T) -> Any:
428
+ """Convert accumulator state to final result."""
429
+ pass
430
+
431
+
432
+ # =============================================================================
433
+ # Built-in Aggregators
434
+ # =============================================================================
435
+
436
+
437
+ @dataclass
438
+ class CountState:
439
+ """State for count aggregator."""
440
+
441
+ count: int = 0
442
+
443
+
444
+ class CountAggregator(BaseAggregator[CountState]):
445
+ """Distributed count aggregator."""
446
+
447
+ name = "count"
448
+
449
+ def initialize(self) -> CountState:
450
+ return CountState()
451
+
452
+ def accumulate(self, state: CountState, value: Any) -> CountState:
453
+ state.count += 1
454
+ return state
455
+
456
+ def merge(self, state1: CountState, state2: CountState) -> CountState:
457
+ return CountState(count=state1.count + state2.count)
458
+
459
+ def finalize(self, state: CountState) -> int:
460
+ return state.count
461
+
462
+
463
+ @dataclass
464
+ class SumState:
465
+ """State for sum aggregator."""
466
+
467
+ total: float = 0.0
468
+
469
+
470
+ class SumAggregator(BaseAggregator[SumState]):
471
+ """Distributed sum aggregator."""
472
+
473
+ name = "sum"
474
+
475
+ def initialize(self) -> SumState:
476
+ return SumState()
477
+
478
+ def accumulate(self, state: SumState, value: Any) -> SumState:
479
+ if value is not None:
480
+ state.total += float(value)
481
+ return state
482
+
483
+ def merge(self, state1: SumState, state2: SumState) -> SumState:
484
+ return SumState(total=state1.total + state2.total)
485
+
486
+ def finalize(self, state: SumState) -> float:
487
+ return state.total
488
+
489
+
490
+ @dataclass
491
+ class MeanState:
492
+ """State for mean aggregator (uses Welford's online algorithm)."""
493
+
494
+ count: int = 0
495
+ mean: float = 0.0
496
+ m2: float = 0.0 # Sum of squared differences from mean
497
+
498
+
499
+ class MeanAggregator(BaseAggregator[MeanState]):
500
+ """Distributed mean aggregator using parallel Welford's algorithm."""
501
+
502
+ name = "mean"
503
+
504
+ def initialize(self) -> MeanState:
505
+ return MeanState()
506
+
507
+ def accumulate(self, state: MeanState, value: Any) -> MeanState:
508
+ if value is None:
509
+ return state
510
+ x = float(value)
511
+ state.count += 1
512
+ delta = x - state.mean
513
+ state.mean += delta / state.count
514
+ delta2 = x - state.mean
515
+ state.m2 += delta * delta2
516
+ return state
517
+
518
+ def merge(self, state1: MeanState, state2: MeanState) -> MeanState:
519
+ """Merge using parallel Welford's algorithm."""
520
+ if state1.count == 0:
521
+ return state2
522
+ if state2.count == 0:
523
+ return state1
524
+
525
+ count = state1.count + state2.count
526
+ delta = state2.mean - state1.mean
527
+ mean = state1.mean + delta * state2.count / count
528
+ m2 = (
529
+ state1.m2
530
+ + state2.m2
531
+ + delta**2 * state1.count * state2.count / count
532
+ )
533
+ return MeanState(count=count, mean=mean, m2=m2)
534
+
535
+ def finalize(self, state: MeanState) -> float:
536
+ return state.mean if state.count > 0 else 0.0
537
+
538
+
539
+ @dataclass
540
+ class StdState(MeanState):
541
+ """State for standard deviation aggregator (extends MeanState)."""
542
+
543
+ pass
544
+
545
+
546
+ class StdAggregator(BaseAggregator[StdState]):
547
+ """Distributed standard deviation aggregator."""
548
+
549
+ name = "std"
550
+
551
+ def __init__(self, ddof: int = 1) -> None:
552
+ self.ddof = ddof # Delta degrees of freedom
553
+
554
+ def initialize(self) -> StdState:
555
+ return StdState()
556
+
557
+ def accumulate(self, state: StdState, value: Any) -> StdState:
558
+ if value is None:
559
+ return state
560
+ x = float(value)
561
+ state.count += 1
562
+ delta = x - state.mean
563
+ state.mean += delta / state.count
564
+ delta2 = x - state.mean
565
+ state.m2 += delta * delta2
566
+ return state
567
+
568
+ def merge(self, state1: StdState, state2: StdState) -> StdState:
569
+ if state1.count == 0:
570
+ return state2
571
+ if state2.count == 0:
572
+ return state1
573
+
574
+ count = state1.count + state2.count
575
+ delta = state2.mean - state1.mean
576
+ mean = state1.mean + delta * state2.count / count
577
+ m2 = (
578
+ state1.m2
579
+ + state2.m2
580
+ + delta**2 * state1.count * state2.count / count
581
+ )
582
+ return StdState(count=count, mean=mean, m2=m2)
583
+
584
+ def finalize(self, state: StdState) -> float:
585
+ if state.count <= self.ddof:
586
+ return 0.0
587
+ variance = state.m2 / (state.count - self.ddof)
588
+ return variance**0.5
589
+
590
+
591
+ @dataclass
592
+ class MinMaxState:
593
+ """State for min/max aggregator."""
594
+
595
+ min_value: float | None = None
596
+ max_value: float | None = None
597
+
598
+
599
+ class MinMaxAggregator(BaseAggregator[MinMaxState]):
600
+ """Distributed min/max aggregator."""
601
+
602
+ name = "minmax"
603
+
604
+ def initialize(self) -> MinMaxState:
605
+ return MinMaxState()
606
+
607
+ def accumulate(self, state: MinMaxState, value: Any) -> MinMaxState:
608
+ if value is None:
609
+ return state
610
+ x = float(value)
611
+ if state.min_value is None or x < state.min_value:
612
+ state.min_value = x
613
+ if state.max_value is None or x > state.max_value:
614
+ state.max_value = x
615
+ return state
616
+
617
+ def merge(self, state1: MinMaxState, state2: MinMaxState) -> MinMaxState:
618
+ min_val = None
619
+ max_val = None
620
+
621
+ if state1.min_value is not None and state2.min_value is not None:
622
+ min_val = min(state1.min_value, state2.min_value)
623
+ else:
624
+ min_val = state1.min_value or state2.min_value
625
+
626
+ if state1.max_value is not None and state2.max_value is not None:
627
+ max_val = max(state1.max_value, state2.max_value)
628
+ else:
629
+ max_val = state1.max_value or state2.max_value
630
+
631
+ return MinMaxState(min_value=min_val, max_value=max_val)
632
+
633
+ def finalize(self, state: MinMaxState) -> dict[str, float | None]:
634
+ return {"min": state.min_value, "max": state.max_value}
635
+
636
+
637
+ @dataclass
638
+ class NullCountState:
639
+ """State for null count aggregator."""
640
+
641
+ null_count: int = 0
642
+ total_count: int = 0
643
+
644
+
645
+ class NullCountAggregator(BaseAggregator[NullCountState]):
646
+ """Distributed null count aggregator."""
647
+
648
+ name = "null_count"
649
+
650
+ def initialize(self) -> NullCountState:
651
+ return NullCountState()
652
+
653
+ def accumulate(self, state: NullCountState, value: Any) -> NullCountState:
654
+ state.total_count += 1
655
+ if value is None:
656
+ state.null_count += 1
657
+ return state
658
+
659
+ def merge(self, state1: NullCountState, state2: NullCountState) -> NullCountState:
660
+ return NullCountState(
661
+ null_count=state1.null_count + state2.null_count,
662
+ total_count=state1.total_count + state2.total_count,
663
+ )
664
+
665
+ def finalize(self, state: NullCountState) -> dict[str, int]:
666
+ return {"null_count": state.null_count, "total_count": state.total_count}
667
+
668
+
669
+ @dataclass
670
+ class DistinctState:
671
+ """State for approximate distinct count (HyperLogLog)."""
672
+
673
+ # Simplified version - for production use HyperLogLog
674
+ seen: set = field(default_factory=set)
675
+ count: int = 0
676
+
677
+
678
+ class DistinctCountAggregator(BaseAggregator[DistinctState]):
679
+ """Distributed distinct count aggregator.
680
+
681
+ Note: For very large cardinalities, consider using HyperLogLog.
682
+ """
683
+
684
+ name = "distinct_count"
685
+
686
+ def __init__(self, max_sample: int = 100_000) -> None:
687
+ self.max_sample = max_sample
688
+
689
+ def initialize(self) -> DistinctState:
690
+ return DistinctState()
691
+
692
+ def accumulate(self, state: DistinctState, value: Any) -> DistinctState:
693
+ if value is not None and len(state.seen) < self.max_sample:
694
+ # Use hash for memory efficiency
695
+ try:
696
+ state.seen.add(hash(value))
697
+ except TypeError:
698
+ state.seen.add(hash(str(value)))
699
+ state.count += 1
700
+ return state
701
+
702
+ def merge(self, state1: DistinctState, state2: DistinctState) -> DistinctState:
703
+ merged = DistinctState()
704
+ merged.seen = state1.seen | state2.seen
705
+ merged.count = state1.count + state2.count
706
+ return merged
707
+
708
+ def finalize(self, state: DistinctState) -> int:
709
+ return len(state.seen)
710
+
711
+
712
+ # =============================================================================
713
+ # Aggregator Registry
714
+ # =============================================================================
715
+
716
+
717
+ AGGREGATOR_REGISTRY: dict[str, type[BaseAggregator]] = {
718
+ "count": CountAggregator,
719
+ "sum": SumAggregator,
720
+ "mean": MeanAggregator,
721
+ "std": StdAggregator,
722
+ "minmax": MinMaxAggregator,
723
+ "null_count": NullCountAggregator,
724
+ "distinct_count": DistinctCountAggregator,
725
+ }
726
+
727
+
728
+ def get_aggregator(name: str, **kwargs: Any) -> BaseAggregator:
729
+ """Get an aggregator by name.
730
+
731
+ Args:
732
+ name: Aggregator name.
733
+ **kwargs: Additional arguments for the aggregator.
734
+
735
+ Returns:
736
+ Aggregator instance.
737
+
738
+ Raises:
739
+ KeyError: If aggregator not found.
740
+ """
741
+ if name not in AGGREGATOR_REGISTRY:
742
+ raise KeyError(
743
+ f"Unknown aggregator: {name}. "
744
+ f"Available: {list(AGGREGATOR_REGISTRY.keys())}"
745
+ )
746
+ return AGGREGATOR_REGISTRY[name](**kwargs)
747
+
748
+
749
+ def register_aggregator(name: str, aggregator_class: type[BaseAggregator]) -> None:
750
+ """Register a custom aggregator.
751
+
752
+ Args:
753
+ name: Name to register under.
754
+ aggregator_class: Aggregator class to register.
755
+ """
756
+ AGGREGATOR_REGISTRY[name] = aggregator_class