truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,788 @@
1
+ """Adaptive sampling based on data characteristics.
2
+
3
+ This module provides intelligent sampling strategies that adjust
4
+ sample sizes based on data characteristics and time constraints.
5
+
6
+ Sampling strategies:
7
+ - Uniform: Random sampling with fixed ratio
8
+ - Stratified: Preserves distribution of categorical columns
9
+ - Reservoir: Streaming-friendly sampling for large datasets
10
+ - Adaptive: Automatically adjusts based on time budget
11
+
12
+ Example:
13
+ from truthound.validators.timeout.advanced.sampling import (
14
+ AdaptiveSampler,
15
+ calculate_sample_size,
16
+ )
17
+
18
+ sampler = AdaptiveSampler()
19
+
20
+ # Calculate optimal sample size for time budget
21
+ sample_size = sampler.calculate_size(
22
+ total_rows=1_000_000,
23
+ time_budget_seconds=10.0,
24
+ min_sample=1000,
25
+ )
26
+
27
+ # Sample data
28
+ result = sampler.sample(data, sample_size)
29
+ """
30
+
31
+ from __future__ import annotations
32
+
33
+ import math
34
+ import random
35
+ import statistics
36
+ import threading
37
+ from abc import ABC, abstractmethod
38
+ from dataclasses import dataclass, field
39
+ from datetime import datetime, timezone
40
+ from enum import Enum
41
+ from typing import Any, Generic, Sequence, TypeVar
42
+
43
+ T = TypeVar("T")
44
+
45
+
46
+ class SamplingMethod(str, Enum):
47
+ """Sampling methods."""
48
+
49
+ UNIFORM = "uniform"
50
+ STRATIFIED = "stratified"
51
+ RESERVOIR = "reservoir"
52
+ SYSTEMATIC = "systematic"
53
+ ADAPTIVE = "adaptive"
54
+
55
+
56
+ @dataclass
57
+ class DataCharacteristics:
58
+ """Characteristics of a dataset.
59
+
60
+ Attributes:
61
+ row_count: Number of rows
62
+ column_count: Number of columns
63
+ estimated_bytes: Estimated size in bytes
64
+ null_ratio: Ratio of null values
65
+ unique_ratio: Average ratio of unique values per column
66
+ has_categorical: Whether dataset has categorical columns
67
+ categorical_columns: List of categorical column names
68
+ estimated_processing_time_per_row_ms: Estimated ms per row
69
+ """
70
+
71
+ row_count: int
72
+ column_count: int = 0
73
+ estimated_bytes: int = 0
74
+ null_ratio: float = 0.0
75
+ unique_ratio: float = 0.5
76
+ has_categorical: bool = False
77
+ categorical_columns: list[str] = field(default_factory=list)
78
+ estimated_processing_time_per_row_ms: float = 0.01
79
+
80
+ @classmethod
81
+ def from_data(cls, data: Any) -> "DataCharacteristics":
82
+ """Infer characteristics from data.
83
+
84
+ Args:
85
+ data: Dataset (list, DataFrame, etc.)
86
+
87
+ Returns:
88
+ DataCharacteristics
89
+ """
90
+ # Try to get row count
91
+ if hasattr(data, "__len__"):
92
+ row_count = len(data)
93
+ elif hasattr(data, "shape"):
94
+ row_count = data.shape[0]
95
+ else:
96
+ row_count = 0
97
+
98
+ # Try to get column count
99
+ column_count = 0
100
+ if hasattr(data, "shape") and len(data.shape) > 1:
101
+ column_count = data.shape[1]
102
+ elif hasattr(data, "columns"):
103
+ column_count = len(data.columns)
104
+
105
+ return cls(
106
+ row_count=row_count,
107
+ column_count=column_count,
108
+ )
109
+
110
+
111
+ @dataclass
112
+ class SamplingResult(Generic[T]):
113
+ """Result of sampling operation.
114
+
115
+ Attributes:
116
+ data: Sampled data
117
+ original_size: Original dataset size
118
+ sample_size: Actual sample size
119
+ sampling_ratio: Ratio of original sampled
120
+ method: Sampling method used
121
+ indices: Indices of sampled items (if available)
122
+ metadata: Additional metadata
123
+ """
124
+
125
+ data: T
126
+ original_size: int
127
+ sample_size: int
128
+ sampling_ratio: float
129
+ method: SamplingMethod
130
+ indices: list[int] | None = None
131
+ metadata: dict[str, Any] = field(default_factory=dict)
132
+ timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
133
+
134
+ @property
135
+ def is_full(self) -> bool:
136
+ """Check if full dataset (no sampling)."""
137
+ return self.sample_size >= self.original_size
138
+
139
+ @property
140
+ def confidence_multiplier(self) -> float:
141
+ """Get multiplier for confidence based on sampling ratio.
142
+
143
+ Returns:
144
+ Multiplier (0.0-1.0) for adjusting confidence
145
+ """
146
+ if self.is_full:
147
+ return 1.0
148
+ # Confidence decreases with smaller samples
149
+ return min(1.0, math.sqrt(self.sampling_ratio))
150
+
151
+ def to_dict(self) -> dict[str, Any]:
152
+ """Convert to dictionary."""
153
+ return {
154
+ "original_size": self.original_size,
155
+ "sample_size": self.sample_size,
156
+ "sampling_ratio": self.sampling_ratio,
157
+ "method": self.method.value,
158
+ "is_full": self.is_full,
159
+ "confidence_multiplier": self.confidence_multiplier,
160
+ "metadata": self.metadata,
161
+ }
162
+
163
+
164
+ class SamplingStrategy(ABC):
165
+ """Base class for sampling strategies."""
166
+
167
+ @property
168
+ @abstractmethod
169
+ def name(self) -> str:
170
+ """Strategy name."""
171
+ pass
172
+
173
+ @abstractmethod
174
+ def sample(
175
+ self,
176
+ data: Sequence[T],
177
+ sample_size: int,
178
+ ) -> SamplingResult[list[T]]:
179
+ """Sample data.
180
+
181
+ Args:
182
+ data: Data to sample
183
+ sample_size: Desired sample size
184
+
185
+ Returns:
186
+ SamplingResult with sampled data
187
+ """
188
+ pass
189
+
190
+
191
+ class UniformSampling(SamplingStrategy):
192
+ """Uniform random sampling.
193
+
194
+ Samples items with equal probability.
195
+ """
196
+
197
+ def __init__(self, seed: int | None = None):
198
+ """Initialize uniform sampling.
199
+
200
+ Args:
201
+ seed: Random seed for reproducibility
202
+ """
203
+ self.seed = seed
204
+ self._rng = random.Random(seed)
205
+
206
+ @property
207
+ def name(self) -> str:
208
+ return "uniform"
209
+
210
+ def sample(
211
+ self,
212
+ data: Sequence[T],
213
+ sample_size: int,
214
+ ) -> SamplingResult[list[T]]:
215
+ """Sample uniformly at random."""
216
+ n = len(data)
217
+ if sample_size >= n:
218
+ return SamplingResult(
219
+ data=list(data),
220
+ original_size=n,
221
+ sample_size=n,
222
+ sampling_ratio=1.0,
223
+ method=SamplingMethod.UNIFORM,
224
+ indices=list(range(n)),
225
+ )
226
+
227
+ indices = self._rng.sample(range(n), sample_size)
228
+ indices.sort() # Preserve order
229
+ sampled = [data[i] for i in indices]
230
+
231
+ return SamplingResult(
232
+ data=sampled,
233
+ original_size=n,
234
+ sample_size=sample_size,
235
+ sampling_ratio=sample_size / n,
236
+ method=SamplingMethod.UNIFORM,
237
+ indices=indices,
238
+ )
239
+
240
+
241
+ class StratifiedSampling(SamplingStrategy):
242
+ """Stratified sampling based on a key function.
243
+
244
+ Preserves the distribution of strata in the sample.
245
+ """
246
+
247
+ def __init__(
248
+ self,
249
+ key_fn: Any | None = None,
250
+ seed: int | None = None,
251
+ ):
252
+ """Initialize stratified sampling.
253
+
254
+ Args:
255
+ key_fn: Function to extract stratum key
256
+ seed: Random seed
257
+ """
258
+ self.key_fn = key_fn or (lambda x: x)
259
+ self.seed = seed
260
+ self._rng = random.Random(seed)
261
+
262
+ @property
263
+ def name(self) -> str:
264
+ return "stratified"
265
+
266
+ def sample(
267
+ self,
268
+ data: Sequence[T],
269
+ sample_size: int,
270
+ ) -> SamplingResult[list[T]]:
271
+ """Sample with stratification."""
272
+ n = len(data)
273
+ if sample_size >= n:
274
+ return SamplingResult(
275
+ data=list(data),
276
+ original_size=n,
277
+ sample_size=n,
278
+ sampling_ratio=1.0,
279
+ method=SamplingMethod.STRATIFIED,
280
+ indices=list(range(n)),
281
+ )
282
+
283
+ # Group by stratum
284
+ strata: dict[Any, list[int]] = {}
285
+ for i, item in enumerate(data):
286
+ key = self.key_fn(item)
287
+ if key not in strata:
288
+ strata[key] = []
289
+ strata[key].append(i)
290
+
291
+ # Calculate samples per stratum
292
+ sampling_ratio = sample_size / n
293
+ indices = []
294
+
295
+ for stratum_indices in strata.values():
296
+ stratum_size = len(stratum_indices)
297
+ stratum_sample_size = max(1, int(stratum_size * sampling_ratio))
298
+ stratum_sample_size = min(stratum_sample_size, stratum_size)
299
+
300
+ sampled_indices = self._rng.sample(stratum_indices, stratum_sample_size)
301
+ indices.extend(sampled_indices)
302
+
303
+ # Trim if needed
304
+ if len(indices) > sample_size:
305
+ indices = self._rng.sample(indices, sample_size)
306
+
307
+ indices.sort()
308
+ sampled = [data[i] for i in indices]
309
+
310
+ return SamplingResult(
311
+ data=sampled,
312
+ original_size=n,
313
+ sample_size=len(sampled),
314
+ sampling_ratio=len(sampled) / n,
315
+ method=SamplingMethod.STRATIFIED,
316
+ indices=indices,
317
+ metadata={"strata_count": len(strata)},
318
+ )
319
+
320
+
321
+ class ReservoirSampling(SamplingStrategy):
322
+ """Reservoir sampling for streaming data.
323
+
324
+ Maintains a fixed-size sample as data streams in.
325
+ Uses Algorithm R for uniform random sampling.
326
+ """
327
+
328
+ def __init__(self, seed: int | None = None):
329
+ """Initialize reservoir sampling.
330
+
331
+ Args:
332
+ seed: Random seed
333
+ """
334
+ self.seed = seed
335
+ self._rng = random.Random(seed)
336
+
337
+ @property
338
+ def name(self) -> str:
339
+ return "reservoir"
340
+
341
+ def sample(
342
+ self,
343
+ data: Sequence[T],
344
+ sample_size: int,
345
+ ) -> SamplingResult[list[T]]:
346
+ """Sample using reservoir algorithm."""
347
+ n = len(data)
348
+ if sample_size >= n:
349
+ return SamplingResult(
350
+ data=list(data),
351
+ original_size=n,
352
+ sample_size=n,
353
+ sampling_ratio=1.0,
354
+ method=SamplingMethod.RESERVOIR,
355
+ indices=list(range(n)),
356
+ )
357
+
358
+ # Algorithm R
359
+ reservoir = list(data[:sample_size])
360
+ indices = list(range(sample_size))
361
+
362
+ for i in range(sample_size, n):
363
+ j = self._rng.randint(0, i)
364
+ if j < sample_size:
365
+ reservoir[j] = data[i]
366
+ indices[j] = i
367
+
368
+ return SamplingResult(
369
+ data=reservoir,
370
+ original_size=n,
371
+ sample_size=sample_size,
372
+ sampling_ratio=sample_size / n,
373
+ method=SamplingMethod.RESERVOIR,
374
+ indices=sorted(indices),
375
+ )
376
+
377
+ def create_stream_sampler(self, sample_size: int) -> "StreamReservoir[T]":
378
+ """Create a streaming reservoir sampler.
379
+
380
+ Args:
381
+ sample_size: Size of reservoir
382
+
383
+ Returns:
384
+ StreamReservoir for incremental sampling
385
+ """
386
+ return StreamReservoir(sample_size, self.seed)
387
+
388
+
389
+ class StreamReservoir(Generic[T]):
390
+ """Streaming reservoir for incremental sampling.
391
+
392
+ Use this when data arrives incrementally and you need to
393
+ maintain a fixed-size sample.
394
+
395
+ Example:
396
+ reservoir = StreamReservoir(100)
397
+ for item in streaming_data():
398
+ reservoir.add(item)
399
+ sample = reservoir.get_sample()
400
+ """
401
+
402
+ def __init__(self, sample_size: int, seed: int | None = None):
403
+ """Initialize stream reservoir.
404
+
405
+ Args:
406
+ sample_size: Size of reservoir
407
+ seed: Random seed
408
+ """
409
+ self.sample_size = sample_size
410
+ self._reservoir: list[T] = []
411
+ self._count = 0
412
+ self._rng = random.Random(seed)
413
+
414
+ def add(self, item: T) -> None:
415
+ """Add an item to the reservoir.
416
+
417
+ Args:
418
+ item: Item to add
419
+ """
420
+ self._count += 1
421
+
422
+ if len(self._reservoir) < self.sample_size:
423
+ self._reservoir.append(item)
424
+ else:
425
+ j = self._rng.randint(0, self._count - 1)
426
+ if j < self.sample_size:
427
+ self._reservoir[j] = item
428
+
429
+ def get_sample(self) -> list[T]:
430
+ """Get current sample.
431
+
432
+ Returns:
433
+ List of sampled items
434
+ """
435
+ return list(self._reservoir)
436
+
437
+ @property
438
+ def total_seen(self) -> int:
439
+ """Get total items seen."""
440
+ return self._count
441
+
442
+ @property
443
+ def sampling_ratio(self) -> float:
444
+ """Get current sampling ratio."""
445
+ if self._count == 0:
446
+ return 1.0
447
+ return min(1.0, len(self._reservoir) / self._count)
448
+
449
+
450
+ class SystematicSampling(SamplingStrategy):
451
+ """Systematic sampling with fixed interval.
452
+
453
+ Selects every k-th item after a random start.
454
+ """
455
+
456
+ def __init__(self, seed: int | None = None):
457
+ """Initialize systematic sampling.
458
+
459
+ Args:
460
+ seed: Random seed for start position
461
+ """
462
+ self.seed = seed
463
+ self._rng = random.Random(seed)
464
+
465
+ @property
466
+ def name(self) -> str:
467
+ return "systematic"
468
+
469
+ def sample(
470
+ self,
471
+ data: Sequence[T],
472
+ sample_size: int,
473
+ ) -> SamplingResult[list[T]]:
474
+ """Sample systematically."""
475
+ n = len(data)
476
+ if sample_size >= n:
477
+ return SamplingResult(
478
+ data=list(data),
479
+ original_size=n,
480
+ sample_size=n,
481
+ sampling_ratio=1.0,
482
+ method=SamplingMethod.SYSTEMATIC,
483
+ indices=list(range(n)),
484
+ )
485
+
486
+ # Calculate interval
487
+ interval = n / sample_size
488
+ start = self._rng.random() * interval
489
+
490
+ indices = []
491
+ position = start
492
+ while position < n and len(indices) < sample_size:
493
+ indices.append(int(position))
494
+ position += interval
495
+
496
+ sampled = [data[i] for i in indices]
497
+
498
+ return SamplingResult(
499
+ data=sampled,
500
+ original_size=n,
501
+ sample_size=len(sampled),
502
+ sampling_ratio=len(sampled) / n,
503
+ method=SamplingMethod.SYSTEMATIC,
504
+ indices=indices,
505
+ metadata={"interval": interval, "start": start},
506
+ )
507
+
508
+
509
+ @dataclass
510
+ class SamplingConfig:
511
+ """Configuration for adaptive sampling.
512
+
513
+ Attributes:
514
+ min_sample_size: Minimum sample size
515
+ max_sample_size: Maximum sample size
516
+ target_confidence: Target confidence level
517
+ time_weight: Weight for time vs accuracy tradeoff
518
+ prefer_stratified: Prefer stratified sampling when possible
519
+ """
520
+
521
+ min_sample_size: int = 100
522
+ max_sample_size: int = 100000
523
+ target_confidence: float = 0.95
524
+ time_weight: float = 0.5
525
+ prefer_stratified: bool = True
526
+
527
+
528
+ class AdaptiveSampler:
529
+ """Adaptive sampler that adjusts strategy based on conditions.
530
+
531
+ This sampler automatically selects the best sampling strategy
532
+ and sample size based on:
533
+ - Time budget available
534
+ - Data characteristics
535
+ - Target confidence level
536
+
537
+ Example:
538
+ sampler = AdaptiveSampler()
539
+
540
+ # Calculate optimal sample size
541
+ size = sampler.calculate_size(
542
+ total_rows=1_000_000,
543
+ time_budget_seconds=5.0,
544
+ )
545
+
546
+ # Sample data
547
+ result = sampler.sample(data, size)
548
+ """
549
+
550
+ def __init__(
551
+ self,
552
+ config: SamplingConfig | None = None,
553
+ strategies: dict[SamplingMethod, SamplingStrategy] | None = None,
554
+ ):
555
+ """Initialize adaptive sampler.
556
+
557
+ Args:
558
+ config: Sampling configuration
559
+ strategies: Available sampling strategies
560
+ """
561
+ self.config = config or SamplingConfig()
562
+ self.strategies = strategies or {
563
+ SamplingMethod.UNIFORM: UniformSampling(),
564
+ SamplingMethod.STRATIFIED: StratifiedSampling(),
565
+ SamplingMethod.RESERVOIR: ReservoirSampling(),
566
+ SamplingMethod.SYSTEMATIC: SystematicSampling(),
567
+ }
568
+ self._execution_history: list[tuple[int, float]] = [] # (size, time_ms)
569
+ self._lock = threading.Lock()
570
+
571
+ def calculate_size(
572
+ self,
573
+ total_rows: int,
574
+ time_budget_seconds: float | None = None,
575
+ characteristics: DataCharacteristics | None = None,
576
+ ) -> int:
577
+ """Calculate optimal sample size.
578
+
579
+ Args:
580
+ total_rows: Total number of rows
581
+ time_budget_seconds: Available time budget
582
+ characteristics: Data characteristics
583
+
584
+ Returns:
585
+ Recommended sample size
586
+ """
587
+ # Start with full dataset
588
+ sample_size = total_rows
589
+
590
+ # Apply minimum
591
+ sample_size = max(self.config.min_sample_size, sample_size)
592
+
593
+ # Apply maximum
594
+ sample_size = min(self.config.max_sample_size, sample_size)
595
+
596
+ # Adjust for time budget
597
+ if time_budget_seconds is not None and characteristics is not None:
598
+ estimated_time_per_row_ms = characteristics.estimated_processing_time_per_row_ms
599
+ available_ms = time_budget_seconds * 1000
600
+
601
+ # How many rows can we process?
602
+ max_processable = int(available_ms / max(estimated_time_per_row_ms, 0.001))
603
+ sample_size = min(sample_size, max_processable)
604
+
605
+ # Adjust for confidence
606
+ # Using Cochran's formula for sample size
607
+ z = 1.96 # 95% confidence
608
+ if self.config.target_confidence >= 0.99:
609
+ z = 2.576
610
+ elif self.config.target_confidence >= 0.95:
611
+ z = 1.96
612
+ elif self.config.target_confidence >= 0.90:
613
+ z = 1.645
614
+
615
+ # Standard sample size formula (assuming p=0.5 for maximum variability)
616
+ p = 0.5
617
+ e = 0.05 # 5% margin of error
618
+ min_for_confidence = int((z ** 2 * p * (1 - p)) / (e ** 2))
619
+
620
+ sample_size = max(sample_size, min(min_for_confidence, total_rows))
621
+
622
+ # Final bounds check
623
+ sample_size = max(self.config.min_sample_size, sample_size)
624
+ sample_size = min(self.config.max_sample_size, min(sample_size, total_rows))
625
+
626
+ return sample_size
627
+
628
+ def select_strategy(
629
+ self,
630
+ characteristics: DataCharacteristics | None = None,
631
+ sample_size: int | None = None,
632
+ ) -> SamplingMethod:
633
+ """Select best sampling strategy.
634
+
635
+ Args:
636
+ characteristics: Data characteristics
637
+ sample_size: Target sample size
638
+
639
+ Returns:
640
+ Recommended sampling method
641
+ """
642
+ # Default to uniform
643
+ if characteristics is None:
644
+ return SamplingMethod.UNIFORM
645
+
646
+ # Use stratified for categorical data
647
+ if characteristics.has_categorical and self.config.prefer_stratified:
648
+ return SamplingMethod.STRATIFIED
649
+
650
+ # Use reservoir for very large datasets
651
+ if characteristics.row_count > 10_000_000:
652
+ return SamplingMethod.RESERVOIR
653
+
654
+ # Use systematic for moderate datasets
655
+ if characteristics.row_count > 1_000_000:
656
+ return SamplingMethod.SYSTEMATIC
657
+
658
+ return SamplingMethod.UNIFORM
659
+
660
+ def sample(
661
+ self,
662
+ data: Sequence[T],
663
+ sample_size: int | None = None,
664
+ method: SamplingMethod | None = None,
665
+ characteristics: DataCharacteristics | None = None,
666
+ ) -> SamplingResult[list[T]]:
667
+ """Sample data.
668
+
669
+ Args:
670
+ data: Data to sample
671
+ sample_size: Desired sample size (None = auto)
672
+ method: Sampling method (None = auto)
673
+ characteristics: Data characteristics
674
+
675
+ Returns:
676
+ SamplingResult
677
+ """
678
+ total_rows = len(data)
679
+
680
+ # Auto-calculate sample size
681
+ if sample_size is None:
682
+ if characteristics is None:
683
+ characteristics = DataCharacteristics.from_data(data)
684
+ sample_size = self.calculate_size(total_rows, characteristics=characteristics)
685
+
686
+ # Auto-select method
687
+ if method is None:
688
+ method = self.select_strategy(characteristics, sample_size)
689
+
690
+ # Get strategy
691
+ strategy = self.strategies.get(method, UniformSampling())
692
+
693
+ # Sample
694
+ import time
695
+ start = time.time()
696
+ result = strategy.sample(data, sample_size)
697
+ elapsed_ms = (time.time() - start) * 1000
698
+
699
+ # Record for learning
700
+ with self._lock:
701
+ self._execution_history.append((result.sample_size, elapsed_ms))
702
+ # Keep last 100
703
+ if len(self._execution_history) > 100:
704
+ self._execution_history = self._execution_history[-100:]
705
+
706
+ return result
707
+
708
+ def get_estimated_time_per_row(self) -> float:
709
+ """Get estimated processing time per row.
710
+
711
+ Returns:
712
+ Estimated milliseconds per row
713
+ """
714
+ with self._lock:
715
+ if not self._execution_history:
716
+ return 0.01 # Default
717
+
718
+ # Linear regression
719
+ total_rows = sum(h[0] for h in self._execution_history)
720
+ total_time = sum(h[1] for h in self._execution_history)
721
+
722
+ if total_rows == 0:
723
+ return 0.01
724
+
725
+ return total_time / total_rows
726
+
727
+
728
+ # Module-level sampler
729
+ _default_sampler: AdaptiveSampler | None = None
730
+
731
+
732
+ def calculate_sample_size(
733
+ total_rows: int,
734
+ time_budget_seconds: float | None = None,
735
+ min_sample: int = 100,
736
+ max_sample: int = 100000,
737
+ ) -> int:
738
+ """Calculate optimal sample size.
739
+
740
+ Args:
741
+ total_rows: Total number of rows
742
+ time_budget_seconds: Available time budget
743
+ min_sample: Minimum sample size
744
+ max_sample: Maximum sample size
745
+
746
+ Returns:
747
+ Recommended sample size
748
+ """
749
+ global _default_sampler
750
+ if _default_sampler is None:
751
+ _default_sampler = AdaptiveSampler(SamplingConfig(
752
+ min_sample_size=min_sample,
753
+ max_sample_size=max_sample,
754
+ ))
755
+
756
+ characteristics = DataCharacteristics(row_count=total_rows)
757
+ return _default_sampler.calculate_size(
758
+ total_rows,
759
+ time_budget_seconds,
760
+ characteristics,
761
+ )
762
+
763
+
764
+ def auto_sample(
765
+ data: Sequence[T],
766
+ time_budget_seconds: float | None = None,
767
+ ) -> SamplingResult[list[T]]:
768
+ """Automatically sample data.
769
+
770
+ Args:
771
+ data: Data to sample
772
+ time_budget_seconds: Available time budget
773
+
774
+ Returns:
775
+ SamplingResult
776
+ """
777
+ global _default_sampler
778
+ if _default_sampler is None:
779
+ _default_sampler = AdaptiveSampler()
780
+
781
+ characteristics = DataCharacteristics.from_data(data)
782
+ sample_size = _default_sampler.calculate_size(
783
+ len(data),
784
+ time_budget_seconds,
785
+ characteristics,
786
+ )
787
+
788
+ return _default_sampler.sample(data, sample_size, characteristics=characteristics)