truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1734 @@
1
+ """Standardized Progress Callback System.
2
+
3
+ This module provides a comprehensive, extensible progress callback system
4
+ with Protocol-based design, adapters for various output targets, and
5
+ advanced features like filtering, throttling, and aggregation.
6
+
7
+ Key features:
8
+ - Protocol-based callback abstraction
9
+ - Registry for callback type discovery
10
+ - Multiple output adapters (console, file, logging, webhook)
11
+ - Progress filtering and throttling
12
+ - Hierarchical progress aggregation
13
+ - Event batching and buffering
14
+ - Async callback support
15
+
16
+ Example:
17
+ from truthound.profiler.progress_callbacks import (
18
+ CallbackRegistry,
19
+ ConsoleAdapter,
20
+ LoggingAdapter,
21
+ create_callback_chain,
22
+ )
23
+
24
+ # Create callbacks
25
+ console = ConsoleAdapter()
26
+ logger = LoggingAdapter(logger_name="profiler")
27
+
28
+ # Chain callbacks
29
+ chain = create_callback_chain(console, logger)
30
+
31
+ # Use with profiler
32
+ profiler.profile(data, progress_callback=chain)
33
+ """
34
+
35
+ from __future__ import annotations
36
+
37
+ import asyncio
38
+ import json
39
+ import logging
40
+ import queue
41
+ import threading
42
+ import time
43
+ from abc import ABC, abstractmethod
44
+ from collections import deque
45
+ from dataclasses import dataclass, field
46
+ from datetime import datetime, timedelta
47
+ from enum import Enum, auto
48
+ from pathlib import Path
49
+ from typing import (
50
+ Any,
51
+ Callable,
52
+ Generic,
53
+ Iterator,
54
+ Protocol,
55
+ Sequence,
56
+ TypeVar,
57
+ runtime_checkable,
58
+ )
59
+
60
+
61
+ # =============================================================================
62
+ # Event Types and Levels
63
+ # =============================================================================
64
+
65
+
66
+ class EventLevel(Enum):
67
+ """Severity levels for progress events."""
68
+
69
+ DEBUG = auto() # Detailed debugging info
70
+ INFO = auto() # Normal progress updates
71
+ NOTICE = auto() # Notable milestones
72
+ WARNING = auto() # Non-critical issues
73
+ ERROR = auto() # Errors that don't stop processing
74
+ CRITICAL = auto() # Fatal errors
75
+
76
+
77
+ class EventType(str, Enum):
78
+ """Types of progress events."""
79
+
80
+ # Lifecycle events
81
+ START = "start"
82
+ COMPLETE = "complete"
83
+ FAIL = "fail"
84
+ CANCEL = "cancel"
85
+ PAUSE = "pause"
86
+ RESUME = "resume"
87
+
88
+ # Progress events
89
+ PROGRESS = "progress"
90
+ COLUMN_START = "column_start"
91
+ COLUMN_COMPLETE = "column_complete"
92
+ COLUMN_PROGRESS = "column_progress"
93
+ ANALYZER_START = "analyzer_start"
94
+ ANALYZER_COMPLETE = "analyzer_complete"
95
+
96
+ # Batch/partition events
97
+ BATCH_START = "batch_start"
98
+ BATCH_COMPLETE = "batch_complete"
99
+ PARTITION_START = "partition_start"
100
+ PARTITION_COMPLETE = "partition_complete"
101
+
102
+ # Diagnostic events
103
+ CHECKPOINT = "checkpoint"
104
+ HEARTBEAT = "heartbeat"
105
+ METRIC = "metric"
106
+ LOG = "log"
107
+
108
+
109
+ @dataclass(frozen=True)
110
+ class ProgressContext:
111
+ """Context information for a progress event.
112
+
113
+ Provides structured context that can be nested for hierarchical operations.
114
+ """
115
+
116
+ operation_id: str = "" # Unique operation identifier
117
+ table_name: str = "" # Current table
118
+ column_name: str = "" # Current column
119
+ analyzer_name: str = "" # Current analyzer
120
+ batch_index: int = 0 # Batch number
121
+ partition_index: int = 0 # Partition number
122
+ parent_context: "ProgressContext | None" = None # Parent for nesting
123
+ tags: tuple[str, ...] = () # Custom tags for filtering
124
+
125
+ def with_column(self, column: str) -> "ProgressContext":
126
+ """Create child context for a column."""
127
+ return ProgressContext(
128
+ operation_id=self.operation_id,
129
+ table_name=self.table_name,
130
+ column_name=column,
131
+ parent_context=self,
132
+ tags=self.tags,
133
+ )
134
+
135
+ def with_analyzer(self, analyzer: str) -> "ProgressContext":
136
+ """Create child context for an analyzer."""
137
+ return ProgressContext(
138
+ operation_id=self.operation_id,
139
+ table_name=self.table_name,
140
+ column_name=self.column_name,
141
+ analyzer_name=analyzer,
142
+ parent_context=self,
143
+ tags=self.tags,
144
+ )
145
+
146
+ def get_path(self) -> str:
147
+ """Get hierarchical path string."""
148
+ parts = []
149
+ if self.table_name:
150
+ parts.append(self.table_name)
151
+ if self.column_name:
152
+ parts.append(self.column_name)
153
+ if self.analyzer_name:
154
+ parts.append(self.analyzer_name)
155
+ return "/".join(parts) if parts else ""
156
+
157
+
158
+ @dataclass(frozen=True)
159
+ class ProgressMetrics:
160
+ """Timing and throughput metrics."""
161
+
162
+ elapsed_seconds: float = 0.0
163
+ estimated_remaining_seconds: float | None = None
164
+ rows_processed: int = 0
165
+ rows_per_second: float = 0.0
166
+ columns_completed: int = 0
167
+ columns_total: int = 0
168
+ memory_used_mb: float | None = None
169
+
170
+ @property
171
+ def columns_remaining(self) -> int:
172
+ """Get number of columns remaining."""
173
+ return max(0, self.columns_total - self.columns_completed)
174
+
175
+ @property
176
+ def throughput_string(self) -> str:
177
+ """Get human-readable throughput."""
178
+ if self.rows_per_second >= 1_000_000:
179
+ return f"{self.rows_per_second / 1_000_000:.1f}M rows/s"
180
+ elif self.rows_per_second >= 1_000:
181
+ return f"{self.rows_per_second / 1_000:.1f}K rows/s"
182
+ else:
183
+ return f"{self.rows_per_second:.0f} rows/s"
184
+
185
+
186
+ @dataclass(frozen=True)
187
+ class StandardProgressEvent:
188
+ """Standard progress event with full context.
189
+
190
+ This is the primary event type that flows through the callback system.
191
+ All adapters receive this standardized event format.
192
+ """
193
+
194
+ event_type: EventType
195
+ level: EventLevel = EventLevel.INFO
196
+ progress: float = 0.0 # 0.0 to 1.0
197
+ message: str = ""
198
+ context: ProgressContext = field(default_factory=ProgressContext)
199
+ metrics: ProgressMetrics = field(default_factory=ProgressMetrics)
200
+ timestamp: datetime = field(default_factory=datetime.now)
201
+ metadata: dict[str, Any] = field(default_factory=dict)
202
+
203
+ @property
204
+ def percent(self) -> float:
205
+ """Get progress as percentage."""
206
+ return self.progress * 100
207
+
208
+ @property
209
+ def is_complete(self) -> bool:
210
+ """Check if this is a completion event."""
211
+ return self.event_type in {EventType.COMPLETE, EventType.FAIL, EventType.CANCEL}
212
+
213
+ @property
214
+ def is_error(self) -> bool:
215
+ """Check if this is an error event."""
216
+ return self.level in {EventLevel.ERROR, EventLevel.CRITICAL}
217
+
218
+ def to_dict(self) -> dict[str, Any]:
219
+ """Convert to dictionary for serialization."""
220
+ return {
221
+ "event_type": self.event_type.value,
222
+ "level": self.level.name,
223
+ "progress": self.progress,
224
+ "message": self.message,
225
+ "context": {
226
+ "operation_id": self.context.operation_id,
227
+ "table_name": self.context.table_name,
228
+ "column_name": self.context.column_name,
229
+ "analyzer_name": self.context.analyzer_name,
230
+ "path": self.context.get_path(),
231
+ "tags": list(self.context.tags),
232
+ },
233
+ "metrics": {
234
+ "elapsed_seconds": self.metrics.elapsed_seconds,
235
+ "estimated_remaining": self.metrics.estimated_remaining_seconds,
236
+ "rows_processed": self.metrics.rows_processed,
237
+ "rows_per_second": self.metrics.rows_per_second,
238
+ "columns_completed": self.metrics.columns_completed,
239
+ "columns_total": self.metrics.columns_total,
240
+ },
241
+ "timestamp": self.timestamp.isoformat(),
242
+ "metadata": self.metadata,
243
+ }
244
+
245
+
246
+ # =============================================================================
247
+ # Callback Protocol
248
+ # =============================================================================
249
+
250
+
251
+ @runtime_checkable
252
+ class ProgressCallback(Protocol):
253
+ """Protocol for progress callbacks.
254
+
255
+ Any class implementing this protocol can be used as a progress callback.
256
+ This enables type-safe callbacks while maintaining extensibility.
257
+ """
258
+
259
+ def on_progress(self, event: StandardProgressEvent) -> None:
260
+ """Handle a progress event.
261
+
262
+ Args:
263
+ event: The progress event to handle
264
+ """
265
+ ...
266
+
267
+
268
+ @runtime_checkable
269
+ class AsyncProgressCallback(Protocol):
270
+ """Protocol for async progress callbacks."""
271
+
272
+ async def on_progress_async(self, event: StandardProgressEvent) -> None:
273
+ """Handle a progress event asynchronously.
274
+
275
+ Args:
276
+ event: The progress event to handle
277
+ """
278
+ ...
279
+
280
+
281
+ @runtime_checkable
282
+ class LifecycleCallback(Protocol):
283
+ """Protocol for callbacks with lifecycle management."""
284
+
285
+ def start(self) -> None:
286
+ """Called when profiling starts."""
287
+ ...
288
+
289
+ def stop(self) -> None:
290
+ """Called when profiling ends."""
291
+ ...
292
+
293
+
294
+ # =============================================================================
295
+ # Base Callback Adapter
296
+ # =============================================================================
297
+
298
+
299
+ class CallbackAdapter(ABC):
300
+ """Abstract base class for callback adapters.
301
+
302
+ Provides common functionality for all callback types including
303
+ lifecycle management and configuration.
304
+ """
305
+
306
+ def __init__(
307
+ self,
308
+ *,
309
+ name: str = "",
310
+ enabled: bool = True,
311
+ min_level: EventLevel = EventLevel.INFO,
312
+ event_types: set[EventType] | None = None,
313
+ ):
314
+ """Initialize adapter.
315
+
316
+ Args:
317
+ name: Adapter name for identification
318
+ enabled: Whether adapter is active
319
+ min_level: Minimum event level to process
320
+ event_types: Event types to handle (None = all)
321
+ """
322
+ self.name = name or self.__class__.__name__
323
+ self.enabled = enabled
324
+ self.min_level = min_level
325
+ self.event_types = event_types
326
+ self._started = False
327
+
328
+ def should_handle(self, event: StandardProgressEvent) -> bool:
329
+ """Check if event should be handled.
330
+
331
+ Args:
332
+ event: Event to check
333
+
334
+ Returns:
335
+ True if event should be processed
336
+ """
337
+ if not self.enabled:
338
+ return False
339
+
340
+ if event.level.value < self.min_level.value:
341
+ return False
342
+
343
+ if self.event_types and event.event_type not in self.event_types:
344
+ return False
345
+
346
+ return True
347
+
348
+ def on_progress(self, event: StandardProgressEvent) -> None:
349
+ """Handle progress event with filtering.
350
+
351
+ Args:
352
+ event: Progress event
353
+ """
354
+ if not self.should_handle(event):
355
+ return
356
+
357
+ self._handle_event(event)
358
+
359
+ @abstractmethod
360
+ def _handle_event(self, event: StandardProgressEvent) -> None:
361
+ """Handle the event (implemented by subclasses).
362
+
363
+ Args:
364
+ event: Progress event to handle
365
+ """
366
+ pass
367
+
368
+ def start(self) -> None:
369
+ """Start the adapter."""
370
+ self._started = True
371
+
372
+ def stop(self) -> None:
373
+ """Stop the adapter."""
374
+ self._started = False
375
+
376
+
377
+ # =============================================================================
378
+ # Console Adapters
379
+ # =============================================================================
380
+
381
+
382
+ @dataclass
383
+ class ConsoleStyle:
384
+ """Console output styling configuration."""
385
+
386
+ bar_width: int = 40
387
+ bar_fill: str = "█"
388
+ bar_empty: str = "░"
389
+ show_eta: bool = True
390
+ show_column: bool = True
391
+ show_throughput: bool = True
392
+ color_enabled: bool = True
393
+ clear_on_complete: bool = True
394
+
395
+ # ANSI color codes
396
+ colors: dict[EventLevel, str] = field(default_factory=lambda: {
397
+ EventLevel.DEBUG: "\033[90m", # Gray
398
+ EventLevel.INFO: "\033[0m", # Default
399
+ EventLevel.NOTICE: "\033[94m", # Blue
400
+ EventLevel.WARNING: "\033[93m", # Yellow
401
+ EventLevel.ERROR: "\033[91m", # Red
402
+ EventLevel.CRITICAL: "\033[91;1m", # Bold Red
403
+ })
404
+ reset: str = "\033[0m"
405
+
406
+
407
+ class ConsoleAdapter(CallbackAdapter):
408
+ """Console output adapter with progress bar.
409
+
410
+ Provides rich console output with progress bars, ETA estimation,
411
+ and colored status messages.
412
+
413
+ Example:
414
+ adapter = ConsoleAdapter(style=ConsoleStyle(bar_width=50))
415
+ tracker.add_callback(adapter)
416
+ """
417
+
418
+ def __init__(
419
+ self,
420
+ *,
421
+ style: ConsoleStyle | None = None,
422
+ stream: Any = None, # TextIO
423
+ **kwargs: Any,
424
+ ):
425
+ """Initialize console adapter.
426
+
427
+ Args:
428
+ style: Console styling configuration
429
+ stream: Output stream (default: sys.stderr)
430
+ **kwargs: Base adapter arguments
431
+ """
432
+ super().__init__(**kwargs)
433
+ self.style = style or ConsoleStyle()
434
+ self._stream = stream
435
+ self._last_line_length = 0
436
+
437
+ @property
438
+ def stream(self) -> Any:
439
+ """Get output stream."""
440
+ if self._stream is None:
441
+ import sys
442
+ return sys.stderr
443
+ return self._stream
444
+
445
+ def _handle_event(self, event: StandardProgressEvent) -> None:
446
+ """Handle event with console output."""
447
+ if event.event_type == EventType.PROGRESS:
448
+ self._render_progress_bar(event)
449
+ elif event.is_complete:
450
+ self._render_completion(event)
451
+ else:
452
+ self._render_message(event)
453
+
454
+ def _render_progress_bar(self, event: StandardProgressEvent) -> None:
455
+ """Render progress bar."""
456
+ s = self.style
457
+
458
+ # Build bar
459
+ filled = int(event.progress * s.bar_width)
460
+ bar = s.bar_fill * filled + s.bar_empty * (s.bar_width - filled)
461
+
462
+ # Build parts
463
+ parts = [f"\r[{bar}] {event.percent:5.1f}%"]
464
+
465
+ if s.show_column and event.context.column_name:
466
+ parts.append(f" | {event.context.column_name}")
467
+
468
+ if s.show_throughput and event.metrics.rows_per_second > 0:
469
+ parts.append(f" | {event.metrics.throughput_string}")
470
+
471
+ if s.show_eta and event.metrics.estimated_remaining_seconds is not None:
472
+ eta = self._format_time(event.metrics.estimated_remaining_seconds)
473
+ parts.append(f" | ETA: {eta}")
474
+
475
+ line = "".join(parts)
476
+
477
+ # Pad to overwrite previous line
478
+ if len(line) < self._last_line_length:
479
+ line += " " * (self._last_line_length - len(line))
480
+ self._last_line_length = len(line)
481
+
482
+ print(line, end="", flush=True, file=self.stream)
483
+
484
+ def _render_completion(self, event: StandardProgressEvent) -> None:
485
+ """Render completion message."""
486
+ if self.style.clear_on_complete:
487
+ print("\r" + " " * self._last_line_length + "\r", end="", file=self.stream)
488
+
489
+ color = self._get_color(event.level)
490
+ elapsed = self._format_time(event.metrics.elapsed_seconds)
491
+
492
+ if event.event_type == EventType.COMPLETE:
493
+ msg = f"✓ Complete in {elapsed}"
494
+ elif event.event_type == EventType.FAIL:
495
+ msg = f"✗ Failed: {event.message}"
496
+ else:
497
+ msg = f"○ Cancelled after {elapsed}"
498
+
499
+ print(f"{color}{msg}{self.style.reset}", file=self.stream)
500
+
501
+ def _render_message(self, event: StandardProgressEvent) -> None:
502
+ """Render status message."""
503
+ color = self._get_color(event.level)
504
+ print(f"{color}{event.message}{self.style.reset}", file=self.stream)
505
+
506
+ def _get_color(self, level: EventLevel) -> str:
507
+ """Get ANSI color for level."""
508
+ if not self.style.color_enabled:
509
+ return ""
510
+ return self.style.colors.get(level, "")
511
+
512
+ def _format_time(self, seconds: float) -> str:
513
+ """Format seconds as human-readable time."""
514
+ if seconds < 60:
515
+ return f"{seconds:.0f}s"
516
+ elif seconds < 3600:
517
+ mins = int(seconds // 60)
518
+ secs = int(seconds % 60)
519
+ return f"{mins}m {secs}s"
520
+ else:
521
+ hours = int(seconds // 3600)
522
+ mins = int((seconds % 3600) // 60)
523
+ return f"{hours}h {mins}m"
524
+
525
+
526
+ class MinimalConsoleAdapter(CallbackAdapter):
527
+ """Minimal console output showing only milestones.
528
+
529
+ Useful for environments where minimal output is preferred.
530
+ """
531
+
532
+ def __init__(
533
+ self,
534
+ *,
535
+ show_columns: bool = False,
536
+ milestone_interval: int = 10, # Show every N%
537
+ **kwargs: Any,
538
+ ):
539
+ super().__init__(**kwargs)
540
+ self.show_columns = show_columns
541
+ self.milestone_interval = milestone_interval
542
+ self._last_milestone = -1
543
+
544
+ def _handle_event(self, event: StandardProgressEvent) -> None:
545
+ """Handle event with minimal output."""
546
+ if event.event_type == EventType.START:
547
+ print("Starting profiling...")
548
+ elif event.event_type == EventType.COMPLETE:
549
+ elapsed = event.metrics.elapsed_seconds
550
+ print(f"Completed in {elapsed:.1f}s")
551
+ elif event.event_type == EventType.FAIL:
552
+ print(f"Failed: {event.message}")
553
+ elif event.event_type == EventType.PROGRESS:
554
+ milestone = int(event.percent // self.milestone_interval) * self.milestone_interval
555
+ if milestone > self._last_milestone:
556
+ self._last_milestone = milestone
557
+ print(f"Progress: {milestone}%")
558
+ elif self.show_columns and event.event_type == EventType.COLUMN_COMPLETE:
559
+ print(f" Completed: {event.context.column_name}")
560
+
561
+
562
+ # =============================================================================
563
+ # Logging Adapter
564
+ # =============================================================================
565
+
566
+
567
+ class LoggingAdapter(CallbackAdapter):
568
+ """Logging framework adapter.
569
+
570
+ Routes progress events to Python's logging framework.
571
+
572
+ Example:
573
+ adapter = LoggingAdapter(
574
+ logger_name="profiler.progress",
575
+ min_level=EventLevel.INFO,
576
+ )
577
+ """
578
+
579
+ # Map event levels to logging levels
580
+ LEVEL_MAP = {
581
+ EventLevel.DEBUG: logging.DEBUG,
582
+ EventLevel.INFO: logging.INFO,
583
+ EventLevel.NOTICE: logging.INFO,
584
+ EventLevel.WARNING: logging.WARNING,
585
+ EventLevel.ERROR: logging.ERROR,
586
+ EventLevel.CRITICAL: logging.CRITICAL,
587
+ }
588
+
589
+ def __init__(
590
+ self,
591
+ *,
592
+ logger_name: str = "truthound.progress",
593
+ logger: logging.Logger | None = None,
594
+ include_context: bool = True,
595
+ include_metrics: bool = True,
596
+ **kwargs: Any,
597
+ ):
598
+ """Initialize logging adapter.
599
+
600
+ Args:
601
+ logger_name: Logger name to use
602
+ logger: Existing logger instance
603
+ include_context: Include context in log extras
604
+ include_metrics: Include metrics in log extras
605
+ **kwargs: Base adapter arguments
606
+ """
607
+ super().__init__(**kwargs)
608
+ self._logger = logger or logging.getLogger(logger_name)
609
+ self.include_context = include_context
610
+ self.include_metrics = include_metrics
611
+
612
+ def _handle_event(self, event: StandardProgressEvent) -> None:
613
+ """Log the event."""
614
+ level = self.LEVEL_MAP.get(event.level, logging.INFO)
615
+
616
+ extra: dict[str, Any] = {
617
+ "event_type": event.event_type.value,
618
+ "progress": event.progress,
619
+ }
620
+
621
+ if self.include_context:
622
+ extra["context"] = {
623
+ "operation": event.context.operation_id,
624
+ "table": event.context.table_name,
625
+ "column": event.context.column_name,
626
+ "path": event.context.get_path(),
627
+ }
628
+
629
+ if self.include_metrics:
630
+ extra["metrics"] = {
631
+ "elapsed": event.metrics.elapsed_seconds,
632
+ "rows": event.metrics.rows_processed,
633
+ "throughput": event.metrics.rows_per_second,
634
+ }
635
+
636
+ message = self._format_message(event)
637
+ self._logger.log(level, message, extra=extra)
638
+
639
+ def _format_message(self, event: StandardProgressEvent) -> str:
640
+ """Format log message."""
641
+ if event.message:
642
+ return event.message
643
+
644
+ if event.event_type == EventType.PROGRESS:
645
+ return f"Progress: {event.percent:.1f}%"
646
+ elif event.event_type == EventType.COLUMN_START:
647
+ return f"Starting column: {event.context.column_name}"
648
+ elif event.event_type == EventType.COLUMN_COMPLETE:
649
+ return f"Completed column: {event.context.column_name}"
650
+ elif event.event_type == EventType.COMPLETE:
651
+ return f"Profiling complete ({event.metrics.elapsed_seconds:.1f}s)"
652
+ elif event.event_type == EventType.FAIL:
653
+ return f"Profiling failed: {event.message}"
654
+ else:
655
+ return f"{event.event_type.value}: {event.message or 'No message'}"
656
+
657
+
658
+ # =============================================================================
659
+ # File Adapter
660
+ # =============================================================================
661
+
662
+
663
+ @dataclass
664
+ class FileOutputConfig:
665
+ """Configuration for file output."""
666
+
667
+ format: str = "jsonl" # jsonl, json, csv
668
+ include_all_events: bool = True
669
+ rotate_size_mb: int = 100
670
+ compress_rotated: bool = True
671
+ encoding: str = "utf-8"
672
+
673
+
674
+ class FileAdapter(CallbackAdapter):
675
+ """File output adapter.
676
+
677
+ Writes progress events to a file in various formats.
678
+
679
+ Example:
680
+ adapter = FileAdapter(
681
+ path="profiling_progress.jsonl",
682
+ config=FileOutputConfig(format="jsonl"),
683
+ )
684
+ """
685
+
686
+ def __init__(
687
+ self,
688
+ path: str | Path,
689
+ *,
690
+ config: FileOutputConfig | None = None,
691
+ **kwargs: Any,
692
+ ):
693
+ """Initialize file adapter.
694
+
695
+ Args:
696
+ path: Output file path
697
+ config: File output configuration
698
+ **kwargs: Base adapter arguments
699
+ """
700
+ super().__init__(**kwargs)
701
+ self.path = Path(path)
702
+ self.config = config or FileOutputConfig()
703
+ self._file: Any = None
704
+ self._events: list[dict[str, Any]] = []
705
+
706
+ def start(self) -> None:
707
+ """Open file for writing."""
708
+ super().start()
709
+ if self.config.format == "jsonl":
710
+ self._file = open(self.path, "a", encoding=self.config.encoding)
711
+ else:
712
+ self._events = []
713
+
714
+ def stop(self) -> None:
715
+ """Close file."""
716
+ super().stop()
717
+ if self._file:
718
+ self._file.close()
719
+ self._file = None
720
+
721
+ if self.config.format == "json" and self._events:
722
+ with open(self.path, "w", encoding=self.config.encoding) as f:
723
+ json.dump(self._events, f, indent=2)
724
+
725
+ def _handle_event(self, event: StandardProgressEvent) -> None:
726
+ """Write event to file."""
727
+ event_dict = event.to_dict()
728
+
729
+ if self.config.format == "jsonl" and self._file:
730
+ self._file.write(json.dumps(event_dict) + "\n")
731
+ self._file.flush()
732
+ elif self.config.format == "json":
733
+ self._events.append(event_dict)
734
+
735
+
736
+ # =============================================================================
737
+ # Callback Chain
738
+ # =============================================================================
739
+
740
+
741
+ class CallbackChain:
742
+ """Chains multiple callbacks together.
743
+
744
+ Events are dispatched to all callbacks in the chain.
745
+ Supports adding/removing callbacks dynamically.
746
+
747
+ Example:
748
+ chain = CallbackChain()
749
+ chain.add(console_adapter)
750
+ chain.add(logging_adapter)
751
+ chain.add(file_adapter)
752
+
753
+ # Use as single callback
754
+ profiler.profile(data, progress_callback=chain)
755
+ """
756
+
757
+ def __init__(
758
+ self,
759
+ callbacks: Sequence[CallbackAdapter] | None = None,
760
+ *,
761
+ stop_on_error: bool = False,
762
+ ):
763
+ """Initialize callback chain.
764
+
765
+ Args:
766
+ callbacks: Initial callbacks
767
+ stop_on_error: Stop chain on callback error
768
+ """
769
+ self._callbacks: list[CallbackAdapter] = list(callbacks or [])
770
+ self.stop_on_error = stop_on_error
771
+ self._errors: list[tuple[CallbackAdapter, Exception]] = []
772
+
773
+ def add(self, callback: CallbackAdapter) -> "CallbackChain":
774
+ """Add callback to chain.
775
+
776
+ Args:
777
+ callback: Callback to add
778
+
779
+ Returns:
780
+ Self for chaining
781
+ """
782
+ self._callbacks.append(callback)
783
+ return self
784
+
785
+ def remove(self, callback: CallbackAdapter) -> bool:
786
+ """Remove callback from chain.
787
+
788
+ Args:
789
+ callback: Callback to remove
790
+
791
+ Returns:
792
+ True if callback was removed
793
+ """
794
+ try:
795
+ self._callbacks.remove(callback)
796
+ return True
797
+ except ValueError:
798
+ return False
799
+
800
+ def clear(self) -> None:
801
+ """Remove all callbacks."""
802
+ self._callbacks.clear()
803
+
804
+ def on_progress(self, event: StandardProgressEvent) -> None:
805
+ """Dispatch event to all callbacks.
806
+
807
+ Args:
808
+ event: Event to dispatch
809
+ """
810
+ for callback in self._callbacks:
811
+ try:
812
+ callback.on_progress(event)
813
+ except Exception as e:
814
+ self._errors.append((callback, e))
815
+ if self.stop_on_error:
816
+ raise
817
+
818
+ def start(self) -> None:
819
+ """Start all callbacks."""
820
+ for callback in self._callbacks:
821
+ if hasattr(callback, "start"):
822
+ callback.start()
823
+
824
+ def stop(self) -> None:
825
+ """Stop all callbacks."""
826
+ for callback in self._callbacks:
827
+ if hasattr(callback, "stop"):
828
+ callback.stop()
829
+
830
+ @property
831
+ def errors(self) -> list[tuple[CallbackAdapter, Exception]]:
832
+ """Get errors from callbacks."""
833
+ return self._errors.copy()
834
+
835
+ def __len__(self) -> int:
836
+ return len(self._callbacks)
837
+
838
+ def __iter__(self) -> Iterator[CallbackAdapter]:
839
+ return iter(self._callbacks)
840
+
841
+
842
+ # =============================================================================
843
+ # Filtering and Throttling
844
+ # =============================================================================
845
+
846
+
847
+ @dataclass
848
+ class FilterConfig:
849
+ """Configuration for event filtering."""
850
+
851
+ min_level: EventLevel = EventLevel.INFO
852
+ event_types: set[EventType] | None = None
853
+ include_tags: set[str] | None = None
854
+ exclude_tags: set[str] | None = None
855
+ column_patterns: list[str] | None = None # Glob patterns
856
+ table_patterns: list[str] | None = None
857
+
858
+
859
+ class FilteringAdapter(CallbackAdapter):
860
+ """Filtering wrapper for callbacks.
861
+
862
+ Filters events based on configurable criteria before
863
+ passing to the wrapped callback.
864
+
865
+ Example:
866
+ # Only log column events
867
+ filtered = FilteringAdapter(
868
+ wrapped=LoggingAdapter(),
869
+ config=FilterConfig(
870
+ event_types={EventType.COLUMN_START, EventType.COLUMN_COMPLETE},
871
+ ),
872
+ )
873
+ """
874
+
875
+ def __init__(
876
+ self,
877
+ wrapped: CallbackAdapter,
878
+ config: FilterConfig | None = None,
879
+ **kwargs: Any,
880
+ ):
881
+ """Initialize filtering adapter.
882
+
883
+ Args:
884
+ wrapped: Wrapped callback
885
+ config: Filter configuration
886
+ **kwargs: Base adapter arguments
887
+ """
888
+ super().__init__(**kwargs)
889
+ self.wrapped = wrapped
890
+ self.filter_config = config or FilterConfig()
891
+
892
+ def should_handle(self, event: StandardProgressEvent) -> bool:
893
+ """Apply filtering rules."""
894
+ if not super().should_handle(event):
895
+ return False
896
+
897
+ fc = self.filter_config
898
+
899
+ # Level check
900
+ if event.level.value < fc.min_level.value:
901
+ return False
902
+
903
+ # Event type check
904
+ if fc.event_types and event.event_type not in fc.event_types:
905
+ return False
906
+
907
+ # Tag checks
908
+ if fc.include_tags:
909
+ if not any(tag in event.context.tags for tag in fc.include_tags):
910
+ return False
911
+
912
+ if fc.exclude_tags:
913
+ if any(tag in event.context.tags for tag in fc.exclude_tags):
914
+ return False
915
+
916
+ return True
917
+
918
+ def _handle_event(self, event: StandardProgressEvent) -> None:
919
+ """Pass event to wrapped callback."""
920
+ self.wrapped.on_progress(event)
921
+
922
+ def start(self) -> None:
923
+ """Start wrapped callback."""
924
+ super().start()
925
+ if hasattr(self.wrapped, "start"):
926
+ self.wrapped.start()
927
+
928
+ def stop(self) -> None:
929
+ """Stop wrapped callback."""
930
+ super().stop()
931
+ if hasattr(self.wrapped, "stop"):
932
+ self.wrapped.stop()
933
+
934
+
935
+ @dataclass
936
+ class ThrottleConfig:
937
+ """Configuration for event throttling."""
938
+
939
+ min_interval_ms: int = 100 # Minimum ms between events
940
+ max_events_per_second: int = 10 # Max events per second
941
+ always_emit_types: set[EventType] = field(default_factory=lambda: {
942
+ EventType.START, EventType.COMPLETE, EventType.FAIL,
943
+ EventType.COLUMN_START, EventType.COLUMN_COMPLETE,
944
+ })
945
+
946
+
947
+ class ThrottlingAdapter(CallbackAdapter):
948
+ """Throttling wrapper for callbacks.
949
+
950
+ Limits the rate of events passed to the wrapped callback.
951
+ Always passes lifecycle events regardless of throttle.
952
+
953
+ Example:
954
+ # Limit console updates to 5 per second
955
+ throttled = ThrottlingAdapter(
956
+ wrapped=ConsoleAdapter(),
957
+ config=ThrottleConfig(max_events_per_second=5),
958
+ )
959
+ """
960
+
961
+ def __init__(
962
+ self,
963
+ wrapped: CallbackAdapter,
964
+ config: ThrottleConfig | None = None,
965
+ **kwargs: Any,
966
+ ):
967
+ """Initialize throttling adapter.
968
+
969
+ Args:
970
+ wrapped: Wrapped callback
971
+ config: Throttle configuration
972
+ **kwargs: Base adapter arguments
973
+ """
974
+ super().__init__(**kwargs)
975
+ self.wrapped = wrapped
976
+ self.throttle_config = config or ThrottleConfig()
977
+ self._last_emit_time: float = 0
978
+ self._event_times: deque[float] = deque(maxlen=100)
979
+
980
+ def _handle_event(self, event: StandardProgressEvent) -> None:
981
+ """Handle event with throttling."""
982
+ tc = self.throttle_config
983
+ now = time.time()
984
+
985
+ # Always emit certain event types
986
+ if event.event_type in tc.always_emit_types:
987
+ self._emit(event, now)
988
+ return
989
+
990
+ # Check minimum interval
991
+ interval_ms = (now - self._last_emit_time) * 1000
992
+ if interval_ms < tc.min_interval_ms:
993
+ return
994
+
995
+ # Check rate limit
996
+ cutoff = now - 1.0 # Last second
997
+ while self._event_times and self._event_times[0] < cutoff:
998
+ self._event_times.popleft()
999
+
1000
+ if len(self._event_times) >= tc.max_events_per_second:
1001
+ return
1002
+
1003
+ self._emit(event, now)
1004
+
1005
+ def _emit(self, event: StandardProgressEvent, now: float) -> None:
1006
+ """Emit event to wrapped callback."""
1007
+ self._last_emit_time = now
1008
+ self._event_times.append(now)
1009
+ self.wrapped.on_progress(event)
1010
+
1011
+ def start(self) -> None:
1012
+ """Start wrapped callback."""
1013
+ super().start()
1014
+ self._last_emit_time = 0
1015
+ self._event_times.clear()
1016
+ if hasattr(self.wrapped, "start"):
1017
+ self.wrapped.start()
1018
+
1019
+ def stop(self) -> None:
1020
+ """Stop wrapped callback."""
1021
+ super().stop()
1022
+ if hasattr(self.wrapped, "stop"):
1023
+ self.wrapped.stop()
1024
+
1025
+
1026
+ # =============================================================================
1027
+ # Buffering and Batching
1028
+ # =============================================================================
1029
+
1030
+
1031
+ @dataclass
1032
+ class BufferConfig:
1033
+ """Configuration for event buffering."""
1034
+
1035
+ max_size: int = 100 # Max events in buffer
1036
+ flush_interval_seconds: float = 5.0 # Auto-flush interval
1037
+ flush_on_complete: bool = True # Flush on completion events
1038
+
1039
+
1040
+ class BufferingAdapter(CallbackAdapter):
1041
+ """Buffering wrapper for callbacks.
1042
+
1043
+ Buffers events and flushes them in batches to reduce
1044
+ callback overhead.
1045
+
1046
+ Example:
1047
+ # Buffer events and flush every 100 events or 5 seconds
1048
+ buffered = BufferingAdapter(
1049
+ wrapped=FileAdapter("events.jsonl"),
1050
+ config=BufferConfig(max_size=100, flush_interval_seconds=5),
1051
+ )
1052
+ """
1053
+
1054
+ def __init__(
1055
+ self,
1056
+ wrapped: CallbackAdapter,
1057
+ config: BufferConfig | None = None,
1058
+ **kwargs: Any,
1059
+ ):
1060
+ """Initialize buffering adapter.
1061
+
1062
+ Args:
1063
+ wrapped: Wrapped callback
1064
+ config: Buffer configuration
1065
+ **kwargs: Base adapter arguments
1066
+ """
1067
+ super().__init__(**kwargs)
1068
+ self.wrapped = wrapped
1069
+ self.buffer_config = config or BufferConfig()
1070
+ self._buffer: list[StandardProgressEvent] = []
1071
+ self._last_flush: float = 0
1072
+ self._lock = threading.Lock()
1073
+
1074
+ def _handle_event(self, event: StandardProgressEvent) -> None:
1075
+ """Buffer event."""
1076
+ bc = self.buffer_config
1077
+
1078
+ with self._lock:
1079
+ self._buffer.append(event)
1080
+
1081
+ # Check if we should flush
1082
+ should_flush = (
1083
+ len(self._buffer) >= bc.max_size or
1084
+ (bc.flush_on_complete and event.is_complete)
1085
+ )
1086
+
1087
+ # Check interval
1088
+ now = time.time()
1089
+ if now - self._last_flush >= bc.flush_interval_seconds:
1090
+ should_flush = True
1091
+
1092
+ if should_flush:
1093
+ self._flush()
1094
+
1095
+ def _flush(self) -> None:
1096
+ """Flush buffer to wrapped callback."""
1097
+ if not self._buffer:
1098
+ return
1099
+
1100
+ events = self._buffer
1101
+ self._buffer = []
1102
+ self._last_flush = time.time()
1103
+
1104
+ for event in events:
1105
+ self.wrapped.on_progress(event)
1106
+
1107
+ def stop(self) -> None:
1108
+ """Flush and stop."""
1109
+ with self._lock:
1110
+ self._flush()
1111
+ super().stop()
1112
+ if hasattr(self.wrapped, "stop"):
1113
+ self.wrapped.stop()
1114
+
1115
+
1116
+ # =============================================================================
1117
+ # Async Adapter
1118
+ # =============================================================================
1119
+
1120
+
1121
+ class AsyncAdapter(CallbackAdapter):
1122
+ """Async callback adapter.
1123
+
1124
+ Processes events asynchronously using an event loop.
1125
+
1126
+ Example:
1127
+ async def handle_event(event):
1128
+ await send_to_service(event)
1129
+
1130
+ adapter = AsyncAdapter(async_handler=handle_event)
1131
+ """
1132
+
1133
+ def __init__(
1134
+ self,
1135
+ async_handler: Callable[[StandardProgressEvent], Any],
1136
+ *,
1137
+ loop: asyncio.AbstractEventLoop | None = None,
1138
+ **kwargs: Any,
1139
+ ):
1140
+ """Initialize async adapter.
1141
+
1142
+ Args:
1143
+ async_handler: Async function to handle events
1144
+ loop: Event loop to use
1145
+ **kwargs: Base adapter arguments
1146
+ """
1147
+ super().__init__(**kwargs)
1148
+ self._handler = async_handler
1149
+ self._loop = loop
1150
+ self._queue: queue.Queue[StandardProgressEvent | None] = queue.Queue()
1151
+ self._thread: threading.Thread | None = None
1152
+ self._running = False
1153
+
1154
+ def start(self) -> None:
1155
+ """Start async processing thread."""
1156
+ super().start()
1157
+ self._running = True
1158
+ self._thread = threading.Thread(target=self._run_loop, daemon=True)
1159
+ self._thread.start()
1160
+
1161
+ def stop(self) -> None:
1162
+ """Stop async processing."""
1163
+ self._running = False
1164
+ self._queue.put(None) # Signal to stop
1165
+ if self._thread:
1166
+ self._thread.join(timeout=5.0)
1167
+ super().stop()
1168
+
1169
+ def _handle_event(self, event: StandardProgressEvent) -> None:
1170
+ """Queue event for async processing."""
1171
+ if self._running:
1172
+ self._queue.put(event)
1173
+
1174
+ def _run_loop(self) -> None:
1175
+ """Run event loop in background thread."""
1176
+ loop = self._loop or asyncio.new_event_loop()
1177
+ asyncio.set_event_loop(loop)
1178
+
1179
+ while self._running:
1180
+ try:
1181
+ event = self._queue.get(timeout=1.0)
1182
+ if event is None:
1183
+ break
1184
+ loop.run_until_complete(self._handler(event))
1185
+ except queue.Empty:
1186
+ continue
1187
+ except Exception:
1188
+ pass # Log error in production
1189
+
1190
+ loop.close()
1191
+
1192
+
1193
+ # =============================================================================
1194
+ # Registry Pattern
1195
+ # =============================================================================
1196
+
1197
+
1198
+ class CallbackRegistry:
1199
+ """Registry for callback adapters.
1200
+
1201
+ Provides discovery and factory pattern for callbacks.
1202
+
1203
+ Example:
1204
+ registry = CallbackRegistry()
1205
+
1206
+ # Register custom callback
1207
+ @registry.register("custom")
1208
+ class CustomAdapter(CallbackAdapter):
1209
+ ...
1210
+
1211
+ # Create callback by name
1212
+ callback = registry.create("console", bar_width=50)
1213
+ """
1214
+
1215
+ _instance: "CallbackRegistry | None" = None
1216
+
1217
+ def __init__(self) -> None:
1218
+ self._adapters: dict[str, type[CallbackAdapter]] = {}
1219
+ self._factories: dict[str, Callable[..., CallbackAdapter]] = {}
1220
+
1221
+ # Register built-in adapters
1222
+ self._register_builtin()
1223
+
1224
+ @classmethod
1225
+ def get_instance(cls) -> "CallbackRegistry":
1226
+ """Get singleton instance."""
1227
+ if cls._instance is None:
1228
+ cls._instance = cls()
1229
+ return cls._instance
1230
+
1231
+ def _register_builtin(self) -> None:
1232
+ """Register built-in adapters."""
1233
+ self._adapters["console"] = ConsoleAdapter
1234
+ self._adapters["minimal_console"] = MinimalConsoleAdapter
1235
+ self._adapters["logging"] = LoggingAdapter
1236
+ self._adapters["file"] = FileAdapter
1237
+
1238
+ def register(
1239
+ self,
1240
+ name: str,
1241
+ ) -> Callable[[type[CallbackAdapter]], type[CallbackAdapter]]:
1242
+ """Decorator to register a callback adapter.
1243
+
1244
+ Args:
1245
+ name: Registration name
1246
+
1247
+ Returns:
1248
+ Decorator function
1249
+ """
1250
+ def decorator(cls: type[CallbackAdapter]) -> type[CallbackAdapter]:
1251
+ self._adapters[name] = cls
1252
+ return cls
1253
+ return decorator
1254
+
1255
+ def register_factory(
1256
+ self,
1257
+ name: str,
1258
+ factory: Callable[..., CallbackAdapter],
1259
+ ) -> None:
1260
+ """Register a factory function.
1261
+
1262
+ Args:
1263
+ name: Registration name
1264
+ factory: Factory function
1265
+ """
1266
+ self._factories[name] = factory
1267
+
1268
+ def create(self, name: str, **kwargs: Any) -> CallbackAdapter:
1269
+ """Create callback by name.
1270
+
1271
+ Args:
1272
+ name: Registered name
1273
+ **kwargs: Callback arguments
1274
+
1275
+ Returns:
1276
+ Created callback
1277
+
1278
+ Raises:
1279
+ KeyError: If name not registered
1280
+ """
1281
+ if name in self._factories:
1282
+ return self._factories[name](**kwargs)
1283
+
1284
+ if name in self._adapters:
1285
+ return self._adapters[name](**kwargs)
1286
+
1287
+ raise KeyError(f"Unknown callback adapter: {name}")
1288
+
1289
+ def list_adapters(self) -> list[str]:
1290
+ """List all registered adapter names."""
1291
+ return sorted(set(self._adapters.keys()) | set(self._factories.keys()))
1292
+
1293
+ def get_adapter_class(self, name: str) -> type[CallbackAdapter] | None:
1294
+ """Get adapter class by name."""
1295
+ return self._adapters.get(name)
1296
+
1297
+
1298
+ # =============================================================================
1299
+ # Event Emitter
1300
+ # =============================================================================
1301
+
1302
+
1303
+ class ProgressEmitter:
1304
+ """Emits standardized progress events.
1305
+
1306
+ This class is used by profilers to emit progress events
1307
+ in a standardized format.
1308
+
1309
+ Example:
1310
+ emitter = ProgressEmitter(
1311
+ callback=chain,
1312
+ operation_id="prof_001",
1313
+ table_name="users",
1314
+ total_columns=10,
1315
+ )
1316
+
1317
+ emitter.start()
1318
+ for col in columns:
1319
+ emitter.column_start(col)
1320
+ # ... profile
1321
+ emitter.column_complete(col)
1322
+ emitter.complete()
1323
+ """
1324
+
1325
+ def __init__(
1326
+ self,
1327
+ callback: ProgressCallback | CallbackChain | None = None,
1328
+ *,
1329
+ operation_id: str = "",
1330
+ table_name: str = "",
1331
+ total_columns: int = 0,
1332
+ total_rows: int | None = None,
1333
+ ):
1334
+ """Initialize emitter.
1335
+
1336
+ Args:
1337
+ callback: Callback to receive events
1338
+ operation_id: Unique operation identifier
1339
+ table_name: Name of table being profiled
1340
+ total_columns: Total columns to profile
1341
+ total_rows: Total rows (if known)
1342
+ """
1343
+ self._callback = callback
1344
+ self._context = ProgressContext(
1345
+ operation_id=operation_id or self._generate_id(),
1346
+ table_name=table_name,
1347
+ )
1348
+ self._total_columns = total_columns
1349
+ self._total_rows = total_rows
1350
+ self._completed_columns = 0
1351
+ self._rows_processed = 0
1352
+ self._start_time: datetime | None = None
1353
+ self._current_column: str | None = None
1354
+
1355
+ def _generate_id(self) -> str:
1356
+ """Generate unique operation ID."""
1357
+ import uuid
1358
+ return f"op_{uuid.uuid4().hex[:8]}"
1359
+
1360
+ def start(self, message: str = "Starting profiling") -> None:
1361
+ """Emit start event."""
1362
+ self._start_time = datetime.now()
1363
+ self._emit(StandardProgressEvent(
1364
+ event_type=EventType.START,
1365
+ level=EventLevel.NOTICE,
1366
+ progress=0.0,
1367
+ message=message,
1368
+ context=self._context,
1369
+ metrics=self._build_metrics(),
1370
+ ))
1371
+
1372
+ def column_start(self, column: str) -> None:
1373
+ """Emit column start event."""
1374
+ self._current_column = column
1375
+ context = self._context.with_column(column)
1376
+
1377
+ self._emit(StandardProgressEvent(
1378
+ event_type=EventType.COLUMN_START,
1379
+ level=EventLevel.INFO,
1380
+ progress=self._calculate_progress(),
1381
+ message=f"Starting column: {column}",
1382
+ context=context,
1383
+ metrics=self._build_metrics(),
1384
+ ))
1385
+
1386
+ def column_progress(
1387
+ self,
1388
+ column: str,
1389
+ progress: float,
1390
+ *,
1391
+ rows: int = 0,
1392
+ analyzer: str | None = None,
1393
+ ) -> None:
1394
+ """Emit column progress event."""
1395
+ self._rows_processed += rows
1396
+ context = self._context.with_column(column)
1397
+ if analyzer:
1398
+ context = context.with_analyzer(analyzer)
1399
+
1400
+ self._emit(StandardProgressEvent(
1401
+ event_type=EventType.COLUMN_PROGRESS,
1402
+ level=EventLevel.DEBUG,
1403
+ progress=self._calculate_progress(progress),
1404
+ message=f"Profiling {column}" + (f" ({analyzer})" if analyzer else ""),
1405
+ context=context,
1406
+ metrics=self._build_metrics(),
1407
+ ))
1408
+
1409
+ def column_complete(self, column: str) -> None:
1410
+ """Emit column complete event."""
1411
+ self._completed_columns += 1
1412
+ self._current_column = None
1413
+ context = self._context.with_column(column)
1414
+
1415
+ self._emit(StandardProgressEvent(
1416
+ event_type=EventType.COLUMN_COMPLETE,
1417
+ level=EventLevel.INFO,
1418
+ progress=self._calculate_progress(),
1419
+ message=f"Completed column: {column}",
1420
+ context=context,
1421
+ metrics=self._build_metrics(),
1422
+ ))
1423
+
1424
+ def progress(self, progress: float, message: str = "") -> None:
1425
+ """Emit generic progress event."""
1426
+ self._emit(StandardProgressEvent(
1427
+ event_type=EventType.PROGRESS,
1428
+ level=EventLevel.INFO,
1429
+ progress=progress,
1430
+ message=message,
1431
+ context=self._context,
1432
+ metrics=self._build_metrics(),
1433
+ ))
1434
+
1435
+ def complete(self, message: str = "Profiling complete") -> None:
1436
+ """Emit completion event."""
1437
+ self._emit(StandardProgressEvent(
1438
+ event_type=EventType.COMPLETE,
1439
+ level=EventLevel.NOTICE,
1440
+ progress=1.0,
1441
+ message=message,
1442
+ context=self._context,
1443
+ metrics=self._build_metrics(),
1444
+ ))
1445
+
1446
+ def fail(self, message: str, error: Exception | None = None) -> None:
1447
+ """Emit failure event."""
1448
+ metadata = {"error": str(error)} if error else {}
1449
+
1450
+ self._emit(StandardProgressEvent(
1451
+ event_type=EventType.FAIL,
1452
+ level=EventLevel.ERROR,
1453
+ progress=self._calculate_progress(),
1454
+ message=message,
1455
+ context=self._context,
1456
+ metrics=self._build_metrics(),
1457
+ metadata=metadata,
1458
+ ))
1459
+
1460
+ def checkpoint(self, name: str, **metadata: Any) -> None:
1461
+ """Emit checkpoint event."""
1462
+ self._emit(StandardProgressEvent(
1463
+ event_type=EventType.CHECKPOINT,
1464
+ level=EventLevel.NOTICE,
1465
+ progress=self._calculate_progress(),
1466
+ message=f"Checkpoint: {name}",
1467
+ context=self._context,
1468
+ metrics=self._build_metrics(),
1469
+ metadata=metadata,
1470
+ ))
1471
+
1472
+ def _emit(self, event: StandardProgressEvent) -> None:
1473
+ """Emit event to callback."""
1474
+ if self._callback:
1475
+ try:
1476
+ self._callback.on_progress(event)
1477
+ except Exception:
1478
+ pass # Don't let callback errors stop profiling
1479
+
1480
+ def _calculate_progress(self, column_progress: float = 0.0) -> float:
1481
+ """Calculate overall progress."""
1482
+ if self._total_columns == 0:
1483
+ return 0.0
1484
+
1485
+ base = self._completed_columns / self._total_columns
1486
+ current = column_progress / self._total_columns
1487
+ return min(1.0, base + current)
1488
+
1489
+ def _build_metrics(self) -> ProgressMetrics:
1490
+ """Build current metrics."""
1491
+ elapsed = 0.0
1492
+ if self._start_time:
1493
+ elapsed = (datetime.now() - self._start_time).total_seconds()
1494
+
1495
+ rows_per_second = self._rows_processed / elapsed if elapsed > 0 else 0.0
1496
+
1497
+ progress = self._calculate_progress()
1498
+ estimated_remaining = None
1499
+ if progress > 0 and elapsed > 0:
1500
+ total_estimated = elapsed / progress
1501
+ estimated_remaining = max(0, total_estimated - elapsed)
1502
+
1503
+ return ProgressMetrics(
1504
+ elapsed_seconds=elapsed,
1505
+ estimated_remaining_seconds=estimated_remaining,
1506
+ rows_processed=self._rows_processed,
1507
+ rows_per_second=rows_per_second,
1508
+ columns_completed=self._completed_columns,
1509
+ columns_total=self._total_columns,
1510
+ )
1511
+
1512
+
1513
+ # =============================================================================
1514
+ # Presets
1515
+ # =============================================================================
1516
+
1517
+
1518
+ class CallbackPresets:
1519
+ """Pre-configured callback setups for common use cases."""
1520
+
1521
+ @staticmethod
1522
+ def console_only(
1523
+ *,
1524
+ show_eta: bool = True,
1525
+ color: bool = True,
1526
+ ) -> CallbackChain:
1527
+ """Console output only."""
1528
+ style = ConsoleStyle(show_eta=show_eta, color_enabled=color)
1529
+ return CallbackChain([ConsoleAdapter(style=style)])
1530
+
1531
+ @staticmethod
1532
+ def logging_only(
1533
+ *,
1534
+ logger_name: str = "truthound.progress",
1535
+ min_level: EventLevel = EventLevel.INFO,
1536
+ ) -> CallbackChain:
1537
+ """Logging output only."""
1538
+ return CallbackChain([
1539
+ LoggingAdapter(logger_name=logger_name, min_level=min_level)
1540
+ ])
1541
+
1542
+ @staticmethod
1543
+ def console_and_logging(
1544
+ *,
1545
+ logger_name: str = "truthound.progress",
1546
+ ) -> CallbackChain:
1547
+ """Console and logging output."""
1548
+ return CallbackChain([
1549
+ ConsoleAdapter(),
1550
+ LoggingAdapter(logger_name=logger_name),
1551
+ ])
1552
+
1553
+ @staticmethod
1554
+ def full_observability(
1555
+ *,
1556
+ log_file: str | Path,
1557
+ logger_name: str = "truthound.progress",
1558
+ ) -> CallbackChain:
1559
+ """Full observability with console, logging, and file output."""
1560
+ return CallbackChain([
1561
+ ConsoleAdapter(),
1562
+ LoggingAdapter(logger_name=logger_name),
1563
+ FileAdapter(log_file),
1564
+ ])
1565
+
1566
+ @staticmethod
1567
+ def production(
1568
+ *,
1569
+ logger_name: str = "truthound.progress",
1570
+ max_events_per_second: int = 10,
1571
+ ) -> CallbackChain:
1572
+ """Production setup with throttling."""
1573
+ logging_adapter = LoggingAdapter(
1574
+ logger_name=logger_name,
1575
+ min_level=EventLevel.INFO,
1576
+ )
1577
+
1578
+ throttled = ThrottlingAdapter(
1579
+ wrapped=logging_adapter,
1580
+ config=ThrottleConfig(max_events_per_second=max_events_per_second),
1581
+ )
1582
+
1583
+ return CallbackChain([throttled])
1584
+
1585
+ @staticmethod
1586
+ def silent() -> CallbackChain:
1587
+ """No output (useful for testing)."""
1588
+ return CallbackChain([])
1589
+
1590
+
1591
+ # =============================================================================
1592
+ # Convenience Functions
1593
+ # =============================================================================
1594
+
1595
+
1596
+ def create_callback_chain(
1597
+ *adapters: CallbackAdapter,
1598
+ stop_on_error: bool = False,
1599
+ ) -> CallbackChain:
1600
+ """Create a callback chain from adapters.
1601
+
1602
+ Args:
1603
+ *adapters: Adapters to chain
1604
+ stop_on_error: Stop on callback errors
1605
+
1606
+ Returns:
1607
+ Configured callback chain
1608
+ """
1609
+ return CallbackChain(list(adapters), stop_on_error=stop_on_error)
1610
+
1611
+
1612
+ def create_console_callback(
1613
+ *,
1614
+ bar_width: int = 40,
1615
+ show_eta: bool = True,
1616
+ color: bool = True,
1617
+ ) -> ConsoleAdapter:
1618
+ """Create a console callback.
1619
+
1620
+ Args:
1621
+ bar_width: Progress bar width
1622
+ show_eta: Show ETA
1623
+ color: Enable color
1624
+
1625
+ Returns:
1626
+ Console adapter
1627
+ """
1628
+ style = ConsoleStyle(
1629
+ bar_width=bar_width,
1630
+ show_eta=show_eta,
1631
+ color_enabled=color,
1632
+ )
1633
+ return ConsoleAdapter(style=style)
1634
+
1635
+
1636
+ def create_logging_callback(
1637
+ logger_name: str = "truthound.progress",
1638
+ *,
1639
+ min_level: EventLevel = EventLevel.INFO,
1640
+ ) -> LoggingAdapter:
1641
+ """Create a logging callback.
1642
+
1643
+ Args:
1644
+ logger_name: Logger name
1645
+ min_level: Minimum event level
1646
+
1647
+ Returns:
1648
+ Logging adapter
1649
+ """
1650
+ return LoggingAdapter(logger_name=logger_name, min_level=min_level)
1651
+
1652
+
1653
+ def create_file_callback(
1654
+ path: str | Path,
1655
+ *,
1656
+ format: str = "jsonl",
1657
+ ) -> FileAdapter:
1658
+ """Create a file callback.
1659
+
1660
+ Args:
1661
+ path: Output file path
1662
+ format: Output format (jsonl, json)
1663
+
1664
+ Returns:
1665
+ File adapter
1666
+ """
1667
+ config = FileOutputConfig(format=format)
1668
+ return FileAdapter(path, config=config)
1669
+
1670
+
1671
+ def with_throttling(
1672
+ callback: CallbackAdapter,
1673
+ *,
1674
+ max_per_second: int = 10,
1675
+ min_interval_ms: int = 100,
1676
+ ) -> ThrottlingAdapter:
1677
+ """Wrap callback with throttling.
1678
+
1679
+ Args:
1680
+ callback: Callback to wrap
1681
+ max_per_second: Max events per second
1682
+ min_interval_ms: Min interval between events
1683
+
1684
+ Returns:
1685
+ Throttled callback
1686
+ """
1687
+ config = ThrottleConfig(
1688
+ max_events_per_second=max_per_second,
1689
+ min_interval_ms=min_interval_ms,
1690
+ )
1691
+ return ThrottlingAdapter(wrapped=callback, config=config)
1692
+
1693
+
1694
+ def with_filtering(
1695
+ callback: CallbackAdapter,
1696
+ *,
1697
+ min_level: EventLevel = EventLevel.INFO,
1698
+ event_types: set[EventType] | None = None,
1699
+ ) -> FilteringAdapter:
1700
+ """Wrap callback with filtering.
1701
+
1702
+ Args:
1703
+ callback: Callback to wrap
1704
+ min_level: Minimum level to pass
1705
+ event_types: Event types to pass
1706
+
1707
+ Returns:
1708
+ Filtered callback
1709
+ """
1710
+ config = FilterConfig(min_level=min_level, event_types=event_types)
1711
+ return FilteringAdapter(wrapped=callback, config=config)
1712
+
1713
+
1714
+ def with_buffering(
1715
+ callback: CallbackAdapter,
1716
+ *,
1717
+ max_size: int = 100,
1718
+ flush_interval: float = 5.0,
1719
+ ) -> BufferingAdapter:
1720
+ """Wrap callback with buffering.
1721
+
1722
+ Args:
1723
+ callback: Callback to wrap
1724
+ max_size: Max buffer size
1725
+ flush_interval: Flush interval in seconds
1726
+
1727
+ Returns:
1728
+ Buffered callback
1729
+ """
1730
+ config = BufferConfig(
1731
+ max_size=max_size,
1732
+ flush_interval_seconds=flush_interval,
1733
+ )
1734
+ return BufferingAdapter(wrapped=callback, config=config)