truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,817 @@
1
+ """Arrow-based zero-copy bridge between distributed backends and Polars.
2
+
3
+ This module provides efficient data transfer between distributed
4
+ computing frameworks (Spark, Dask, Ray) and Polars using Apache Arrow
5
+ as the intermediate format.
6
+
7
+ Key Features:
8
+ - Zero-copy conversion when possible (same memory buffer)
9
+ - Chunked transfer for memory efficiency
10
+ - Automatic type mapping
11
+ - Fallback paths for unsupported types
12
+
13
+ Architecture:
14
+ ┌─────────────────────────────────────────────────────────────────┐
15
+ │ ArrowBridge │
16
+ │ │
17
+ │ ┌───────────────┐ ┌───────────────┐ │
18
+ │ │ Spark DataFrame│ → │ Arrow Table │ → Polars │
19
+ │ │ (Native Arrow) │ │ (Zero-copy) │ LazyFrame │
20
+ │ └───────────────┘ └───────────────┘ │
21
+ │ │
22
+ │ ┌───────────────┐ ┌───────────────┐ │
23
+ │ │ Dask DataFrame │ → │ Arrow Batches │ → Polars │
24
+ │ │ (Pandas-based)│ │ (Chunked) │ LazyFrame │
25
+ │ └───────────────┘ └───────────────┘ │
26
+ │ │
27
+ │ ┌───────────────┐ ┌───────────────┐ │
28
+ │ │ Ray Dataset │ → │ Arrow Batches │ → Polars │
29
+ │ │ (Native Arrow) │ │ (Streaming) │ LazyFrame │
30
+ │ └───────────────┘ └───────────────┘ │
31
+ │ │
32
+ └─────────────────────────────────────────────────────────────────┘
33
+
34
+ Example:
35
+ >>> from truthound.execution.distributed.arrow_bridge import ArrowBridge
36
+ >>>
37
+ >>> # Convert Spark DataFrame to Polars
38
+ >>> bridge = ArrowBridge()
39
+ >>> polars_lf = bridge.spark_to_polars(spark_df)
40
+ >>>
41
+ >>> # Convert with chunking for large data
42
+ >>> for chunk_lf in bridge.spark_to_polars_chunked(spark_df, chunk_size=100_000):
43
+ ... process(chunk_lf)
44
+ """
45
+
46
+ from __future__ import annotations
47
+
48
+ import logging
49
+ from dataclasses import dataclass, field
50
+ from enum import Enum, auto
51
+ from typing import TYPE_CHECKING, Any, Iterator
52
+
53
+ if TYPE_CHECKING:
54
+ import polars as pl
55
+ import pyarrow as pa
56
+ from pyspark.sql import DataFrame as SparkDataFrame
57
+
58
+ logger = logging.getLogger(__name__)
59
+
60
+
61
+ # =============================================================================
62
+ # Configuration
63
+ # =============================================================================
64
+
65
+
66
+ class ArrowConversionStrategy(str, Enum):
67
+ """Strategies for Arrow conversion."""
68
+
69
+ NATIVE = "native" # Use native Arrow support (fastest)
70
+ PANDAS = "pandas" # Convert via Pandas (most compatible)
71
+ MANUAL = "manual" # Manual row-by-row conversion (slowest)
72
+ AUTO = "auto" # Auto-detect best strategy
73
+
74
+
75
+ @dataclass
76
+ class ArrowBridgeConfig:
77
+ """Configuration for Arrow bridge.
78
+
79
+ Attributes:
80
+ strategy: Conversion strategy to use.
81
+ batch_size: Batch size for chunked conversion.
82
+ max_memory_bytes: Maximum memory to use for conversion.
83
+ coerce_temporal: Coerce temporal types to standard formats.
84
+ preserve_index: Preserve DataFrame index (Pandas).
85
+ null_handling: How to handle nulls ("mask", "sentinel", "error").
86
+ """
87
+
88
+ strategy: ArrowConversionStrategy = ArrowConversionStrategy.AUTO
89
+ batch_size: int = 65536
90
+ max_memory_bytes: int = 1024 * 1024 * 1024 # 1GB
91
+ coerce_temporal: bool = True
92
+ preserve_index: bool = False
93
+ null_handling: str = "mask"
94
+
95
+
96
+ # =============================================================================
97
+ # Type Mapping
98
+ # =============================================================================
99
+
100
+
101
+ def _spark_type_to_arrow(spark_type: Any) -> "pa.DataType":
102
+ """Convert Spark data type to Arrow data type.
103
+
104
+ Args:
105
+ spark_type: Spark data type.
106
+
107
+ Returns:
108
+ Corresponding Arrow data type.
109
+ """
110
+ import pyarrow as pa
111
+ from pyspark.sql.types import (
112
+ ArrayType,
113
+ BinaryType,
114
+ BooleanType,
115
+ ByteType,
116
+ DateType,
117
+ DecimalType,
118
+ DoubleType,
119
+ FloatType,
120
+ IntegerType,
121
+ LongType,
122
+ MapType,
123
+ ShortType,
124
+ StringType,
125
+ StructField,
126
+ StructType,
127
+ TimestampType,
128
+ )
129
+
130
+ type_map = {
131
+ BooleanType: pa.bool_,
132
+ ByteType: pa.int8,
133
+ ShortType: pa.int16,
134
+ IntegerType: pa.int32,
135
+ LongType: pa.int64,
136
+ FloatType: pa.float32,
137
+ DoubleType: pa.float64,
138
+ StringType: pa.string,
139
+ BinaryType: pa.binary,
140
+ DateType: pa.date32,
141
+ }
142
+
143
+ spark_type_class = type(spark_type)
144
+
145
+ if spark_type_class in type_map:
146
+ return type_map[spark_type_class]()
147
+
148
+ if spark_type_class == TimestampType:
149
+ return pa.timestamp("us")
150
+
151
+ if spark_type_class == DecimalType:
152
+ return pa.decimal128(spark_type.precision, spark_type.scale)
153
+
154
+ if spark_type_class == ArrayType:
155
+ element_type = _spark_type_to_arrow(spark_type.elementType)
156
+ return pa.list_(element_type)
157
+
158
+ if spark_type_class == MapType:
159
+ key_type = _spark_type_to_arrow(spark_type.keyType)
160
+ value_type = _spark_type_to_arrow(spark_type.valueType)
161
+ return pa.map_(key_type, value_type)
162
+
163
+ if spark_type_class == StructType:
164
+ fields = []
165
+ for field in spark_type.fields:
166
+ arrow_type = _spark_type_to_arrow(field.dataType)
167
+ fields.append(pa.field(field.name, arrow_type, nullable=field.nullable))
168
+ return pa.struct(fields)
169
+
170
+ # Default to string for unknown types
171
+ logger.warning(f"Unknown Spark type {spark_type}, defaulting to string")
172
+ return pa.string()
173
+
174
+
175
+ def _polars_type_to_arrow(polars_type: Any) -> "pa.DataType":
176
+ """Convert Polars data type to Arrow data type.
177
+
178
+ Args:
179
+ polars_type: Polars data type.
180
+
181
+ Returns:
182
+ Corresponding Arrow data type.
183
+ """
184
+ import pyarrow as pa
185
+ import polars as pl
186
+
187
+ type_map = {
188
+ pl.Boolean: pa.bool_,
189
+ pl.Int8: pa.int8,
190
+ pl.Int16: pa.int16,
191
+ pl.Int32: pa.int32,
192
+ pl.Int64: pa.int64,
193
+ pl.UInt8: pa.uint8,
194
+ pl.UInt16: pa.uint16,
195
+ pl.UInt32: pa.uint32,
196
+ pl.UInt64: pa.uint64,
197
+ pl.Float32: pa.float32,
198
+ pl.Float64: pa.float64,
199
+ pl.String: pa.string,
200
+ pl.Utf8: pa.string,
201
+ pl.Binary: pa.binary,
202
+ pl.Date: pa.date32,
203
+ pl.Time: lambda: pa.time64("us"),
204
+ pl.Datetime: lambda: pa.timestamp("us"),
205
+ pl.Duration: lambda: pa.duration("us"),
206
+ }
207
+
208
+ polars_type_class = type(polars_type)
209
+
210
+ if polars_type_class in type_map:
211
+ result = type_map[polars_type_class]
212
+ return result() if callable(result) else result
213
+
214
+ if polars_type_class == pl.List:
215
+ inner_type = _polars_type_to_arrow(polars_type.inner)
216
+ return pa.list_(inner_type)
217
+
218
+ if polars_type_class == pl.Struct:
219
+ fields = []
220
+ for field in polars_type.fields:
221
+ arrow_type = _polars_type_to_arrow(field.dtype)
222
+ fields.append(pa.field(field.name, arrow_type))
223
+ return pa.struct(fields)
224
+
225
+ # Default to string
226
+ return pa.string()
227
+
228
+
229
+ # =============================================================================
230
+ # Arrow Bridge
231
+ # =============================================================================
232
+
233
+
234
+ class ArrowBridge:
235
+ """Bridge for converting between distributed data and Polars via Arrow.
236
+
237
+ This class provides efficient conversion between Spark, Dask, Ray,
238
+ and Polars using Apache Arrow as the intermediate format.
239
+
240
+ Features:
241
+ - Zero-copy when possible (native Arrow support)
242
+ - Chunked conversion for memory efficiency
243
+ - Automatic type mapping
244
+ - Multiple fallback strategies
245
+
246
+ Example:
247
+ >>> bridge = ArrowBridge()
248
+ >>>
249
+ >>> # Convert Spark to Polars
250
+ >>> polars_lf = bridge.spark_to_polars(spark_df)
251
+ >>>
252
+ >>> # Convert Polars to Spark
253
+ >>> spark_df = bridge.polars_to_spark(polars_df, spark)
254
+ """
255
+
256
+ def __init__(self, config: ArrowBridgeConfig | None = None) -> None:
257
+ """Initialize Arrow bridge.
258
+
259
+ Args:
260
+ config: Optional configuration.
261
+ """
262
+ self._config = config or ArrowBridgeConfig()
263
+
264
+ @property
265
+ def config(self) -> ArrowBridgeConfig:
266
+ """Get bridge configuration."""
267
+ return self._config
268
+
269
+ # -------------------------------------------------------------------------
270
+ # Spark <-> Polars
271
+ # -------------------------------------------------------------------------
272
+
273
+ def spark_to_polars(
274
+ self,
275
+ spark_df: "SparkDataFrame",
276
+ collect: bool = True,
277
+ ) -> "pl.LazyFrame | pl.DataFrame":
278
+ """Convert Spark DataFrame to Polars.
279
+
280
+ Args:
281
+ spark_df: Spark DataFrame.
282
+ collect: If True, return DataFrame; if False, return LazyFrame.
283
+
284
+ Returns:
285
+ Polars DataFrame or LazyFrame.
286
+ """
287
+ import polars as pl
288
+
289
+ strategy = self._determine_strategy(spark_df)
290
+
291
+ if strategy == ArrowConversionStrategy.NATIVE:
292
+ table = self._spark_to_arrow_native(spark_df)
293
+ elif strategy == ArrowConversionStrategy.PANDAS:
294
+ table = self._spark_to_arrow_pandas(spark_df)
295
+ else:
296
+ table = self._spark_to_arrow_manual(spark_df)
297
+
298
+ df = pl.from_arrow(table)
299
+
300
+ if collect:
301
+ return df
302
+ return df.lazy()
303
+
304
+ def spark_to_polars_chunked(
305
+ self,
306
+ spark_df: "SparkDataFrame",
307
+ chunk_size: int | None = None,
308
+ ) -> Iterator["pl.DataFrame"]:
309
+ """Convert Spark DataFrame to Polars in chunks.
310
+
311
+ This is useful for very large DataFrames that don't fit in memory.
312
+
313
+ Args:
314
+ spark_df: Spark DataFrame.
315
+ chunk_size: Rows per chunk.
316
+
317
+ Yields:
318
+ Polars DataFrames for each chunk.
319
+ """
320
+ import polars as pl
321
+
322
+ chunk_size = chunk_size or self._config.batch_size
323
+
324
+ # Get Arrow batches
325
+ batches = self._spark_to_arrow_batches(spark_df, chunk_size)
326
+
327
+ for batch in batches:
328
+ import pyarrow as pa
329
+
330
+ table = pa.Table.from_batches([batch])
331
+ yield pl.from_arrow(table)
332
+
333
+ def polars_to_spark(
334
+ self,
335
+ polars_df: "pl.DataFrame | pl.LazyFrame",
336
+ spark: Any,
337
+ ) -> "SparkDataFrame":
338
+ """Convert Polars DataFrame to Spark.
339
+
340
+ Args:
341
+ polars_df: Polars DataFrame or LazyFrame.
342
+ spark: SparkSession.
343
+
344
+ Returns:
345
+ Spark DataFrame.
346
+ """
347
+ import polars as pl
348
+ import pyarrow as pa
349
+
350
+ if isinstance(polars_df, pl.LazyFrame):
351
+ polars_df = polars_df.collect()
352
+
353
+ # Convert to Arrow
354
+ arrow_table = polars_df.to_arrow()
355
+
356
+ # Try native Arrow conversion (Spark 3.0+)
357
+ try:
358
+ return spark.createDataFrame(arrow_table.to_pandas())
359
+ except Exception:
360
+ # Fallback to Pandas
361
+ return spark.createDataFrame(polars_df.to_pandas())
362
+
363
+ def _spark_to_arrow_native(
364
+ self,
365
+ spark_df: "SparkDataFrame",
366
+ ) -> "pa.Table":
367
+ """Convert Spark to Arrow using native support.
368
+
369
+ Args:
370
+ spark_df: Spark DataFrame.
371
+
372
+ Returns:
373
+ Arrow Table.
374
+ """
375
+ import pyarrow as pa
376
+
377
+ try:
378
+ # Spark 3.0+ supports _collect_as_arrow()
379
+ batches = spark_df._collect_as_arrow()
380
+ return pa.Table.from_batches(batches)
381
+ except AttributeError:
382
+ # Fallback to Pandas path
383
+ return self._spark_to_arrow_pandas(spark_df)
384
+
385
+ def _spark_to_arrow_pandas(
386
+ self,
387
+ spark_df: "SparkDataFrame",
388
+ ) -> "pa.Table":
389
+ """Convert Spark to Arrow via Pandas.
390
+
391
+ Args:
392
+ spark_df: Spark DataFrame.
393
+
394
+ Returns:
395
+ Arrow Table.
396
+ """
397
+ import pyarrow as pa
398
+
399
+ # Enable Arrow optimization in Spark
400
+ spark_df.sparkSession.conf.set(
401
+ "spark.sql.execution.arrow.pyspark.enabled",
402
+ "true",
403
+ )
404
+
405
+ pandas_df = spark_df.toPandas()
406
+ return pa.Table.from_pandas(
407
+ pandas_df,
408
+ preserve_index=self._config.preserve_index,
409
+ )
410
+
411
+ def _spark_to_arrow_manual(
412
+ self,
413
+ spark_df: "SparkDataFrame",
414
+ ) -> "pa.Table":
415
+ """Convert Spark to Arrow manually.
416
+
417
+ This is the slowest but most compatible path.
418
+
419
+ Args:
420
+ spark_df: Spark DataFrame.
421
+
422
+ Returns:
423
+ Arrow Table.
424
+ """
425
+ import pyarrow as pa
426
+
427
+ # Infer Arrow schema
428
+ arrow_fields = []
429
+ for field in spark_df.schema.fields:
430
+ arrow_type = _spark_type_to_arrow(field.dataType)
431
+ arrow_fields.append(pa.field(field.name, arrow_type, nullable=field.nullable))
432
+ arrow_schema = pa.schema(arrow_fields)
433
+
434
+ # Collect data
435
+ columns = spark_df.columns
436
+ data = {col: [] for col in columns}
437
+
438
+ for row in spark_df.collect():
439
+ row_dict = row.asDict()
440
+ for col in columns:
441
+ data[col].append(row_dict.get(col))
442
+
443
+ return pa.Table.from_pydict(data, schema=arrow_schema)
444
+
445
+ def _spark_to_arrow_batches(
446
+ self,
447
+ spark_df: "SparkDataFrame",
448
+ batch_size: int,
449
+ ) -> Iterator["pa.RecordBatch"]:
450
+ """Convert Spark to Arrow batches.
451
+
452
+ Args:
453
+ spark_df: Spark DataFrame.
454
+ batch_size: Batch size.
455
+
456
+ Yields:
457
+ Arrow RecordBatches.
458
+ """
459
+ import pyarrow as pa
460
+
461
+ try:
462
+ # Try native Arrow batches
463
+ batches = spark_df._collect_as_arrow()
464
+ for batch in batches:
465
+ yield batch
466
+ except AttributeError:
467
+ # Fallback: collect partitions separately
468
+ for partition_rows in spark_df.rdd.mapPartitions(
469
+ lambda it: [list(it)]
470
+ ).collect():
471
+ if not partition_rows:
472
+ continue
473
+
474
+ columns = spark_df.columns
475
+ data = {col: [] for col in columns}
476
+
477
+ for row in partition_rows:
478
+ row_dict = row.asDict()
479
+ for col in columns:
480
+ data[col].append(row_dict.get(col))
481
+
482
+ # Create batch
483
+ batch = pa.RecordBatch.from_pydict(data)
484
+ yield batch
485
+
486
+ def _determine_strategy(
487
+ self,
488
+ spark_df: "SparkDataFrame",
489
+ ) -> ArrowConversionStrategy:
490
+ """Determine best conversion strategy.
491
+
492
+ Args:
493
+ spark_df: Spark DataFrame.
494
+
495
+ Returns:
496
+ Best strategy to use.
497
+ """
498
+ if self._config.strategy != ArrowConversionStrategy.AUTO:
499
+ return self._config.strategy
500
+
501
+ # Check for native Arrow support
502
+ try:
503
+ # Spark 3.0+ has native Arrow support
504
+ version = spark_df.sparkSession.version
505
+ major_version = int(version.split(".")[0])
506
+ if major_version >= 3:
507
+ return ArrowConversionStrategy.NATIVE
508
+ except Exception:
509
+ pass
510
+
511
+ # Check if Arrow is enabled
512
+ try:
513
+ arrow_enabled = spark_df.sparkSession.conf.get(
514
+ "spark.sql.execution.arrow.pyspark.enabled",
515
+ "false",
516
+ )
517
+ if arrow_enabled.lower() == "true":
518
+ return ArrowConversionStrategy.PANDAS
519
+ except Exception:
520
+ pass
521
+
522
+ # Default to Pandas
523
+ return ArrowConversionStrategy.PANDAS
524
+
525
+ # -------------------------------------------------------------------------
526
+ # Dask <-> Polars
527
+ # -------------------------------------------------------------------------
528
+
529
+ def dask_to_polars(
530
+ self,
531
+ dask_df: Any,
532
+ collect: bool = True,
533
+ ) -> "pl.LazyFrame | pl.DataFrame":
534
+ """Convert Dask DataFrame to Polars.
535
+
536
+ Args:
537
+ dask_df: Dask DataFrame.
538
+ collect: If True, return DataFrame; if False, return LazyFrame.
539
+
540
+ Returns:
541
+ Polars DataFrame or LazyFrame.
542
+ """
543
+ import polars as pl
544
+ import pyarrow as pa
545
+
546
+ # Convert via Pandas with Arrow
547
+ pandas_df = dask_df.compute()
548
+ arrow_table = pa.Table.from_pandas(
549
+ pandas_df,
550
+ preserve_index=self._config.preserve_index,
551
+ )
552
+
553
+ df = pl.from_arrow(arrow_table)
554
+
555
+ if collect:
556
+ return df
557
+ return df.lazy()
558
+
559
+ def dask_to_polars_chunked(
560
+ self,
561
+ dask_df: Any,
562
+ chunk_size: int | None = None,
563
+ ) -> Iterator["pl.DataFrame"]:
564
+ """Convert Dask DataFrame to Polars in chunks.
565
+
566
+ Args:
567
+ dask_df: Dask DataFrame.
568
+ chunk_size: Rows per chunk (ignored, uses partitions).
569
+
570
+ Yields:
571
+ Polars DataFrames for each partition.
572
+ """
573
+ import polars as pl
574
+ import pyarrow as pa
575
+
576
+ for i in range(dask_df.npartitions):
577
+ pandas_df = dask_df.get_partition(i).compute()
578
+ arrow_table = pa.Table.from_pandas(pandas_df)
579
+ yield pl.from_arrow(arrow_table)
580
+
581
+ def polars_to_dask(
582
+ self,
583
+ polars_df: "pl.DataFrame | pl.LazyFrame",
584
+ npartitions: int | None = None,
585
+ ) -> Any:
586
+ """Convert Polars DataFrame to Dask.
587
+
588
+ Args:
589
+ polars_df: Polars DataFrame or LazyFrame.
590
+ npartitions: Number of partitions.
591
+
592
+ Returns:
593
+ Dask DataFrame.
594
+ """
595
+ import dask.dataframe as dd
596
+ import polars as pl
597
+
598
+ if isinstance(polars_df, pl.LazyFrame):
599
+ polars_df = polars_df.collect()
600
+
601
+ pandas_df = polars_df.to_pandas()
602
+
603
+ if npartitions:
604
+ return dd.from_pandas(pandas_df, npartitions=npartitions)
605
+ return dd.from_pandas(pandas_df, npartitions=4)
606
+
607
+ # -------------------------------------------------------------------------
608
+ # Ray <-> Polars
609
+ # -------------------------------------------------------------------------
610
+
611
+ def ray_to_polars(
612
+ self,
613
+ ray_dataset: Any,
614
+ collect: bool = True,
615
+ ) -> "pl.LazyFrame | pl.DataFrame":
616
+ """Convert Ray Dataset to Polars.
617
+
618
+ Args:
619
+ ray_dataset: Ray Dataset.
620
+ collect: If True, return DataFrame; if False, return LazyFrame.
621
+
622
+ Returns:
623
+ Polars DataFrame or LazyFrame.
624
+ """
625
+ import polars as pl
626
+
627
+ # Ray datasets support Arrow natively
628
+ arrow_table = ray_dataset.to_arrow()
629
+ df = pl.from_arrow(arrow_table)
630
+
631
+ if collect:
632
+ return df
633
+ return df.lazy()
634
+
635
+ def ray_to_polars_chunked(
636
+ self,
637
+ ray_dataset: Any,
638
+ chunk_size: int | None = None,
639
+ ) -> Iterator["pl.DataFrame"]:
640
+ """Convert Ray Dataset to Polars in chunks.
641
+
642
+ Args:
643
+ ray_dataset: Ray Dataset.
644
+ chunk_size: Rows per chunk.
645
+
646
+ Yields:
647
+ Polars DataFrames for each chunk.
648
+ """
649
+ import polars as pl
650
+ import pyarrow as pa
651
+
652
+ batch_size = chunk_size or self._config.batch_size
653
+
654
+ for batch in ray_dataset.iter_batches(
655
+ batch_size=batch_size,
656
+ batch_format="pyarrow",
657
+ ):
658
+ if isinstance(batch, pa.Table):
659
+ yield pl.from_arrow(batch)
660
+ else:
661
+ # RecordBatch
662
+ table = pa.Table.from_batches([batch])
663
+ yield pl.from_arrow(table)
664
+
665
+ def polars_to_ray(
666
+ self,
667
+ polars_df: "pl.DataFrame | pl.LazyFrame",
668
+ ) -> Any:
669
+ """Convert Polars DataFrame to Ray Dataset.
670
+
671
+ Args:
672
+ polars_df: Polars DataFrame or LazyFrame.
673
+
674
+ Returns:
675
+ Ray Dataset.
676
+ """
677
+ import ray
678
+ import polars as pl
679
+
680
+ if isinstance(polars_df, pl.LazyFrame):
681
+ polars_df = polars_df.collect()
682
+
683
+ arrow_table = polars_df.to_arrow()
684
+ return ray.data.from_arrow(arrow_table)
685
+
686
+ # -------------------------------------------------------------------------
687
+ # Generic Methods
688
+ # -------------------------------------------------------------------------
689
+
690
+ def to_arrow(self, data: Any) -> "pa.Table":
691
+ """Convert any supported data type to Arrow Table.
692
+
693
+ Args:
694
+ data: Data to convert.
695
+
696
+ Returns:
697
+ Arrow Table.
698
+ """
699
+ import pyarrow as pa
700
+ import polars as pl
701
+
702
+ if isinstance(data, pa.Table):
703
+ return data
704
+
705
+ if isinstance(data, (pl.DataFrame, pl.LazyFrame)):
706
+ if isinstance(data, pl.LazyFrame):
707
+ data = data.collect()
708
+ return data.to_arrow()
709
+
710
+ # Check for Spark
711
+ if hasattr(data, "sparkSession"):
712
+ return self._spark_to_arrow_native(data)
713
+
714
+ # Check for Dask
715
+ if hasattr(data, "compute") and hasattr(data, "npartitions"):
716
+ pandas_df = data.compute()
717
+ return pa.Table.from_pandas(pandas_df)
718
+
719
+ # Check for Ray Dataset
720
+ if hasattr(data, "to_arrow"):
721
+ return data.to_arrow()
722
+
723
+ # Check for Pandas
724
+ try:
725
+ import pandas as pd
726
+ if isinstance(data, pd.DataFrame):
727
+ return pa.Table.from_pandas(data)
728
+ except ImportError:
729
+ pass
730
+
731
+ raise ValueError(f"Cannot convert {type(data)} to Arrow")
732
+
733
+ def to_polars(
734
+ self,
735
+ data: Any,
736
+ collect: bool = True,
737
+ ) -> "pl.LazyFrame | pl.DataFrame":
738
+ """Convert any supported data type to Polars.
739
+
740
+ Args:
741
+ data: Data to convert.
742
+ collect: If True, return DataFrame; if False, return LazyFrame.
743
+
744
+ Returns:
745
+ Polars DataFrame or LazyFrame.
746
+ """
747
+ import polars as pl
748
+
749
+ if isinstance(data, pl.DataFrame):
750
+ return data if collect else data.lazy()
751
+
752
+ if isinstance(data, pl.LazyFrame):
753
+ return data.collect() if collect else data
754
+
755
+ # Convert via Arrow
756
+ arrow_table = self.to_arrow(data)
757
+ df = pl.from_arrow(arrow_table)
758
+
759
+ if collect:
760
+ return df
761
+ return df.lazy()
762
+
763
+
764
+ # =============================================================================
765
+ # Convenience Functions
766
+ # =============================================================================
767
+
768
+
769
+ def spark_to_polars(
770
+ spark_df: "SparkDataFrame",
771
+ lazy: bool = True,
772
+ ) -> "pl.LazyFrame | pl.DataFrame":
773
+ """Convert Spark DataFrame to Polars.
774
+
775
+ Args:
776
+ spark_df: Spark DataFrame.
777
+ lazy: If True, return LazyFrame; if False, return DataFrame.
778
+
779
+ Returns:
780
+ Polars DataFrame or LazyFrame.
781
+ """
782
+ bridge = ArrowBridge()
783
+ return bridge.spark_to_polars(spark_df, collect=not lazy)
784
+
785
+
786
+ def polars_to_spark(
787
+ polars_df: "pl.DataFrame | pl.LazyFrame",
788
+ spark: Any,
789
+ ) -> "SparkDataFrame":
790
+ """Convert Polars DataFrame to Spark.
791
+
792
+ Args:
793
+ polars_df: Polars DataFrame or LazyFrame.
794
+ spark: SparkSession.
795
+
796
+ Returns:
797
+ Spark DataFrame.
798
+ """
799
+ bridge = ArrowBridge()
800
+ return bridge.polars_to_spark(polars_df, spark)
801
+
802
+
803
+ def convert_to_polars(
804
+ data: Any,
805
+ lazy: bool = True,
806
+ ) -> "pl.LazyFrame | pl.DataFrame":
807
+ """Convert any supported data type to Polars.
808
+
809
+ Args:
810
+ data: Data to convert.
811
+ lazy: If True, return LazyFrame; if False, return DataFrame.
812
+
813
+ Returns:
814
+ Polars DataFrame or LazyFrame.
815
+ """
816
+ bridge = ArrowBridge()
817
+ return bridge.to_polars(data, collect=not lazy)