truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,801 @@
1
+ """Elasticsearch data source implementation.
2
+
3
+ This module provides async Elasticsearch data source with official
4
+ elasticsearch-py async client support.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ from dataclasses import dataclass, field
10
+ from typing import TYPE_CHECKING, Any, Callable
11
+
12
+ from truthound.datasources._protocols import ColumnType, DataSourceCapability
13
+ from truthound.datasources.nosql.base import (
14
+ BaseNoSQLDataSource,
15
+ NoSQLDataSourceConfig,
16
+ NoSQLDataSourceError,
17
+ )
18
+
19
+ if TYPE_CHECKING:
20
+ import polars as pl
21
+
22
+
23
+ # =============================================================================
24
+ # Exceptions
25
+ # =============================================================================
26
+
27
+
28
+ class ElasticsearchError(NoSQLDataSourceError):
29
+ """Elasticsearch-specific error."""
30
+
31
+ pass
32
+
33
+
34
+ class ElasticsearchConnectionError(ElasticsearchError):
35
+ """Elasticsearch connection error."""
36
+
37
+ def __init__(self, message: str, hosts: list[str] | None = None) -> None:
38
+ self.hosts = hosts
39
+ super().__init__(f"Elasticsearch connection failed: {message}")
40
+
41
+
42
+ # =============================================================================
43
+ # Configuration
44
+ # =============================================================================
45
+
46
+
47
+ @dataclass
48
+ class ElasticsearchConfig(NoSQLDataSourceConfig):
49
+ """Configuration for Elasticsearch data source.
50
+
51
+ Supports connection via hosts list, cloud ID, or custom configuration.
52
+
53
+ Attributes:
54
+ hosts: List of Elasticsearch hosts (e.g., ["http://localhost:9200"]).
55
+ index: Index name or pattern (supports wildcards like "logs-*").
56
+ cloud_id: Elastic Cloud ID for cloud deployments.
57
+ api_key: API key for authentication (string or tuple).
58
+ username: Basic auth username.
59
+ password: Basic auth password.
60
+ bearer_token: Bearer token for authentication.
61
+ ca_certs: Path to CA certificate file.
62
+ verify_certs: Whether to verify SSL certificates.
63
+ scroll_timeout: Scroll context timeout.
64
+ scroll_size: Number of documents per scroll page.
65
+ request_timeout: Request timeout in seconds.
66
+ track_total_hits: Track exact total hits count.
67
+ """
68
+
69
+ hosts: list[str] = field(default_factory=lambda: ["http://localhost:9200"])
70
+ index: str = ""
71
+ cloud_id: str | None = None
72
+ api_key: str | tuple[str, str] | None = None
73
+ username: str | None = None
74
+ password: str | None = None
75
+ bearer_token: str | None = None
76
+ ca_certs: str | None = None
77
+ verify_certs: bool = True
78
+ scroll_timeout: str = "5m"
79
+ scroll_size: int = 1000
80
+ request_timeout: int = 30
81
+ track_total_hits: bool = True
82
+
83
+
84
+ # =============================================================================
85
+ # Elasticsearch Data Source
86
+ # =============================================================================
87
+
88
+
89
+ class ElasticsearchDataSource(BaseNoSQLDataSource):
90
+ """Async Elasticsearch data source.
91
+
92
+ Provides async access to Elasticsearch indices with automatic schema
93
+ inference from index mappings and Polars LazyFrame conversion.
94
+
95
+ Example:
96
+ >>> # Basic usage
97
+ >>> source = ElasticsearchDataSource(ElasticsearchConfig(
98
+ ... hosts=["http://localhost:9200"],
99
+ ... index="my-index",
100
+ ... ))
101
+ >>>
102
+ >>> async with source:
103
+ ... schema = await source.get_schema_async()
104
+ ... lf = await source.to_polars_lazyframe_async()
105
+
106
+ >>> # Elastic Cloud
107
+ >>> source = ElasticsearchDataSource.from_cloud(
108
+ ... cloud_id="my-deployment:dXMtY2VudHJhbC0x...",
109
+ ... api_key="your-api-key",
110
+ ... index="logs-*",
111
+ ... )
112
+ """
113
+
114
+ source_type = "elasticsearch"
115
+
116
+ # Elasticsearch type to ColumnType mapping
117
+ ES_TYPE_MAPPING = {
118
+ # Core types
119
+ "text": ColumnType.STRING,
120
+ "keyword": ColumnType.STRING,
121
+ "long": ColumnType.INTEGER,
122
+ "integer": ColumnType.INTEGER,
123
+ "short": ColumnType.INTEGER,
124
+ "byte": ColumnType.INTEGER,
125
+ "double": ColumnType.FLOAT,
126
+ "float": ColumnType.FLOAT,
127
+ "half_float": ColumnType.FLOAT,
128
+ "scaled_float": ColumnType.FLOAT,
129
+ "boolean": ColumnType.BOOLEAN,
130
+ "date": ColumnType.DATETIME,
131
+ "binary": ColumnType.BINARY,
132
+ # Numeric
133
+ "unsigned_long": ColumnType.INTEGER,
134
+ # Complex types
135
+ "object": ColumnType.STRUCT,
136
+ "nested": ColumnType.STRUCT,
137
+ "flattened": ColumnType.STRUCT,
138
+ # IP and geo
139
+ "ip": ColumnType.STRING,
140
+ "geo_point": ColumnType.STRUCT,
141
+ "geo_shape": ColumnType.STRUCT,
142
+ # Range types
143
+ "integer_range": ColumnType.STRUCT,
144
+ "float_range": ColumnType.STRUCT,
145
+ "long_range": ColumnType.STRUCT,
146
+ "double_range": ColumnType.STRUCT,
147
+ "date_range": ColumnType.STRUCT,
148
+ "ip_range": ColumnType.STRUCT,
149
+ # Other
150
+ "completion": ColumnType.STRING,
151
+ "search_as_you_type": ColumnType.STRING,
152
+ "alias": ColumnType.UNKNOWN,
153
+ "dense_vector": ColumnType.LIST,
154
+ "sparse_vector": ColumnType.STRUCT,
155
+ }
156
+
157
+ def __init__(self, config: ElasticsearchConfig) -> None:
158
+ """Initialize Elasticsearch data source.
159
+
160
+ Args:
161
+ config: Elasticsearch configuration.
162
+
163
+ Raises:
164
+ ElasticsearchError: If index not specified.
165
+ """
166
+ if not config.index:
167
+ raise ElasticsearchError("Index name is required")
168
+
169
+ super().__init__(config)
170
+ self._client: Any = None
171
+
172
+ @property
173
+ def config(self) -> ElasticsearchConfig:
174
+ """Get typed configuration."""
175
+ return self._config # type: ignore
176
+
177
+ @property
178
+ def name(self) -> str:
179
+ """Get data source name."""
180
+ if self._config.name:
181
+ return self._config.name
182
+ return f"es://{self.config.index}"
183
+
184
+ @property
185
+ def index(self) -> str:
186
+ """Get index name or pattern."""
187
+ return self.config.index
188
+
189
+ @property
190
+ def capabilities(self) -> set[DataSourceCapability]:
191
+ """Get data source capabilities."""
192
+ return {
193
+ DataSourceCapability.SCHEMA_INFERENCE,
194
+ DataSourceCapability.SAMPLING,
195
+ DataSourceCapability.STREAMING,
196
+ DataSourceCapability.ROW_COUNT,
197
+ }
198
+
199
+ # -------------------------------------------------------------------------
200
+ # Factory Methods
201
+ # -------------------------------------------------------------------------
202
+
203
+ @classmethod
204
+ def from_cloud(
205
+ cls,
206
+ cloud_id: str,
207
+ api_key: str | tuple[str, str],
208
+ index: str,
209
+ **kwargs: Any,
210
+ ) -> "ElasticsearchDataSource":
211
+ """Create data source for Elastic Cloud.
212
+
213
+ Args:
214
+ cloud_id: Elastic Cloud deployment ID.
215
+ api_key: API key (string or (id, key) tuple).
216
+ index: Index name or pattern.
217
+ **kwargs: Additional configuration options.
218
+
219
+ Returns:
220
+ ElasticsearchDataSource instance.
221
+
222
+ Example:
223
+ >>> source = ElasticsearchDataSource.from_cloud(
224
+ ... cloud_id="my-deployment:base64string",
225
+ ... api_key="my-api-key",
226
+ ... index="logs-*",
227
+ ... )
228
+ """
229
+ config = ElasticsearchConfig(
230
+ cloud_id=cloud_id,
231
+ api_key=api_key,
232
+ index=index,
233
+ verify_certs=True,
234
+ **kwargs,
235
+ )
236
+ return cls(config)
237
+
238
+ @classmethod
239
+ def from_hosts(
240
+ cls,
241
+ hosts: list[str],
242
+ index: str,
243
+ username: str | None = None,
244
+ password: str | None = None,
245
+ **kwargs: Any,
246
+ ) -> "ElasticsearchDataSource":
247
+ """Create data source from host list.
248
+
249
+ Args:
250
+ hosts: List of Elasticsearch hosts.
251
+ index: Index name or pattern.
252
+ username: Optional basic auth username.
253
+ password: Optional basic auth password.
254
+ **kwargs: Additional configuration options.
255
+
256
+ Returns:
257
+ ElasticsearchDataSource instance.
258
+
259
+ Example:
260
+ >>> source = ElasticsearchDataSource.from_hosts(
261
+ ... hosts=["http://node1:9200", "http://node2:9200"],
262
+ ... index="my-index",
263
+ ... username="elastic",
264
+ ... password="changeme",
265
+ ... )
266
+ """
267
+ config = ElasticsearchConfig(
268
+ hosts=hosts,
269
+ index=index,
270
+ username=username,
271
+ password=password,
272
+ **kwargs,
273
+ )
274
+ return cls(config)
275
+
276
+ # -------------------------------------------------------------------------
277
+ # Connection Management
278
+ # -------------------------------------------------------------------------
279
+
280
+ async def _create_connection_factory(self) -> Callable:
281
+ """Create a connection factory."""
282
+
283
+ async def factory():
284
+ try:
285
+ from elasticsearch import AsyncElasticsearch
286
+ except ImportError:
287
+ raise ImportError(
288
+ "elasticsearch[async] is required for Elasticsearch support. "
289
+ "Install it with: pip install elasticsearch[async]"
290
+ )
291
+
292
+ client_kwargs: dict[str, Any] = {
293
+ "request_timeout": self.config.request_timeout,
294
+ }
295
+
296
+ if self.config.cloud_id:
297
+ client_kwargs["cloud_id"] = self.config.cloud_id
298
+ else:
299
+ client_kwargs["hosts"] = self.config.hosts
300
+
301
+ if self.config.api_key:
302
+ client_kwargs["api_key"] = self.config.api_key
303
+ elif self.config.username and self.config.password:
304
+ client_kwargs["basic_auth"] = (
305
+ self.config.username,
306
+ self.config.password,
307
+ )
308
+ elif self.config.bearer_token:
309
+ client_kwargs["bearer_auth"] = self.config.bearer_token
310
+
311
+ if self.config.ca_certs:
312
+ client_kwargs["ca_certs"] = self.config.ca_certs
313
+ client_kwargs["verify_certs"] = self.config.verify_certs
314
+
315
+ return AsyncElasticsearch(**client_kwargs)
316
+
317
+ return factory
318
+
319
+ async def connect_async(self) -> None:
320
+ """Establish connection to Elasticsearch."""
321
+ if self._is_connected:
322
+ return
323
+
324
+ async with self._lock:
325
+ if self._is_connected:
326
+ return
327
+
328
+ try:
329
+ from elasticsearch import AsyncElasticsearch
330
+ except ImportError:
331
+ raise ImportError(
332
+ "elasticsearch[async] is required for Elasticsearch support. "
333
+ "Install it with: pip install elasticsearch[async]"
334
+ )
335
+
336
+ try:
337
+ client_kwargs: dict[str, Any] = {
338
+ "request_timeout": self.config.request_timeout,
339
+ }
340
+
341
+ if self.config.cloud_id:
342
+ client_kwargs["cloud_id"] = self.config.cloud_id
343
+ else:
344
+ client_kwargs["hosts"] = self.config.hosts
345
+
346
+ if self.config.api_key:
347
+ client_kwargs["api_key"] = self.config.api_key
348
+ elif self.config.username and self.config.password:
349
+ client_kwargs["basic_auth"] = (
350
+ self.config.username,
351
+ self.config.password,
352
+ )
353
+ elif self.config.bearer_token:
354
+ client_kwargs["bearer_auth"] = self.config.bearer_token
355
+
356
+ if self.config.ca_certs:
357
+ client_kwargs["ca_certs"] = self.config.ca_certs
358
+ client_kwargs["verify_certs"] = self.config.verify_certs
359
+
360
+ self._client = AsyncElasticsearch(**client_kwargs)
361
+
362
+ # Test connection
363
+ await self._client.info()
364
+
365
+ # Pre-fetch schema
366
+ self._cached_schema = await self.get_schema_async()
367
+
368
+ self._is_connected = True
369
+
370
+ except Exception as e:
371
+ raise ElasticsearchConnectionError(
372
+ str(e), hosts=self.config.hosts
373
+ )
374
+
375
+ async def disconnect_async(self) -> None:
376
+ """Close Elasticsearch connection."""
377
+ if not self._is_connected:
378
+ return
379
+
380
+ async with self._lock:
381
+ if not self._is_connected:
382
+ return
383
+
384
+ if self._client:
385
+ await self._client.close()
386
+ self._client = None
387
+
388
+ self._is_connected = False
389
+
390
+ async def validate_connection_async(self) -> bool:
391
+ """Validate Elasticsearch connection.
392
+
393
+ Returns:
394
+ True if connection is healthy.
395
+ """
396
+ try:
397
+ if not self._is_connected:
398
+ await self.connect_async()
399
+
400
+ result = await self._client.ping()
401
+ return result
402
+ except Exception:
403
+ return False
404
+
405
+ # -------------------------------------------------------------------------
406
+ # Schema from Mapping
407
+ # -------------------------------------------------------------------------
408
+
409
+ async def get_schema_async(self) -> dict[str, ColumnType]:
410
+ """Get schema from index mapping.
411
+
412
+ Returns:
413
+ Column name to type mapping.
414
+ """
415
+ if self._cached_schema is not None:
416
+ return self._cached_schema
417
+
418
+ if not self._is_connected:
419
+ await self.connect_async()
420
+
421
+ # Get index mapping
422
+ mapping_response = await self._client.indices.get_mapping(
423
+ index=self.config.index
424
+ )
425
+
426
+ # Parse mapping to schema
427
+ schema: dict[str, ColumnType] = {}
428
+
429
+ for index_name, index_data in mapping_response.items():
430
+ mappings = index_data.get("mappings", {})
431
+ properties = mappings.get("properties", {})
432
+ self._parse_mapping_properties(properties, "", schema)
433
+
434
+ self._cached_schema = schema
435
+ return schema
436
+
437
+ def _parse_mapping_properties(
438
+ self,
439
+ properties: dict[str, Any],
440
+ prefix: str,
441
+ schema: dict[str, ColumnType],
442
+ ) -> None:
443
+ """Parse mapping properties recursively.
444
+
445
+ Args:
446
+ properties: Mapping properties.
447
+ prefix: Current field prefix.
448
+ schema: Schema dict to populate.
449
+ """
450
+ for field_name, field_def in properties.items():
451
+ full_name = f"{prefix}.{field_name}" if prefix else field_name
452
+ field_type = field_def.get("type", "object")
453
+
454
+ # Handle nested properties
455
+ if "properties" in field_def:
456
+ if self.config.flatten_nested:
457
+ self._parse_mapping_properties(
458
+ field_def["properties"], full_name, schema
459
+ )
460
+ else:
461
+ schema[full_name] = ColumnType.STRUCT
462
+ else:
463
+ # Map ES type to ColumnType
464
+ schema[full_name] = self.ES_TYPE_MAPPING.get(
465
+ field_type, ColumnType.UNKNOWN
466
+ )
467
+
468
+ # -------------------------------------------------------------------------
469
+ # Document Operations
470
+ # -------------------------------------------------------------------------
471
+
472
+ async def _fetch_sample_documents(
473
+ self, n: int
474
+ ) -> list[dict[str, Any]]:
475
+ """Fetch sample documents for schema inference.
476
+
477
+ Args:
478
+ n: Number of documents to sample.
479
+
480
+ Returns:
481
+ List of sample documents.
482
+ """
483
+ if not self._is_connected:
484
+ await self.connect_async()
485
+
486
+ response = await self._client.search(
487
+ index=self.config.index,
488
+ size=n,
489
+ query={"match_all": {}},
490
+ )
491
+
492
+ hits = response.get("hits", {}).get("hits", [])
493
+ return [hit["_source"] for hit in hits]
494
+
495
+ async def _fetch_documents(
496
+ self,
497
+ filter: dict[str, Any] | None = None,
498
+ limit: int | None = None,
499
+ skip: int | None = None,
500
+ ) -> list[dict[str, Any]]:
501
+ """Fetch documents using search or scroll.
502
+
503
+ Args:
504
+ filter: Elasticsearch query.
505
+ limit: Maximum documents to return.
506
+ skip: Number of documents to skip.
507
+
508
+ Returns:
509
+ List of documents.
510
+ """
511
+ if not self._is_connected:
512
+ await self.connect_async()
513
+
514
+ limit = limit or self.config.max_documents
515
+ query = filter or {"match_all": {}}
516
+
517
+ # For small result sets, use regular search
518
+ if limit <= 10000 and not skip:
519
+ response = await self._client.search(
520
+ index=self.config.index,
521
+ size=limit,
522
+ query=query,
523
+ track_total_hits=self.config.track_total_hits,
524
+ )
525
+ hits = response.get("hits", {}).get("hits", [])
526
+ return [self._normalize_document(hit["_source"]) for hit in hits]
527
+
528
+ # For large result sets, use scroll
529
+ return await self._scroll_documents(query, limit, skip)
530
+
531
+ async def _scroll_documents(
532
+ self,
533
+ query: dict[str, Any],
534
+ limit: int,
535
+ skip: int | None = None,
536
+ ) -> list[dict[str, Any]]:
537
+ """Fetch documents using scroll API.
538
+
539
+ Args:
540
+ query: Elasticsearch query.
541
+ limit: Maximum documents.
542
+ skip: Documents to skip.
543
+
544
+ Returns:
545
+ List of documents.
546
+ """
547
+ documents: list[dict[str, Any]] = []
548
+ scroll_id = None
549
+ fetched = 0
550
+ skipped = 0
551
+
552
+ try:
553
+ # Initial search
554
+ response = await self._client.search(
555
+ index=self.config.index,
556
+ scroll=self.config.scroll_timeout,
557
+ size=self.config.scroll_size,
558
+ query=query,
559
+ track_total_hits=self.config.track_total_hits,
560
+ )
561
+
562
+ while True:
563
+ hits = response.get("hits", {}).get("hits", [])
564
+ if not hits:
565
+ break
566
+
567
+ scroll_id = response.get("_scroll_id")
568
+
569
+ for hit in hits:
570
+ if skip and skipped < skip:
571
+ skipped += 1
572
+ continue
573
+
574
+ documents.append(
575
+ self._normalize_document(hit["_source"])
576
+ )
577
+ fetched += 1
578
+
579
+ if fetched >= limit:
580
+ break
581
+
582
+ if fetched >= limit:
583
+ break
584
+
585
+ # Continue scrolling
586
+ response = await self._client.scroll(
587
+ scroll_id=scroll_id,
588
+ scroll=self.config.scroll_timeout,
589
+ )
590
+
591
+ finally:
592
+ # Clear scroll context
593
+ if scroll_id:
594
+ try:
595
+ await self._client.clear_scroll(scroll_id=scroll_id)
596
+ except Exception:
597
+ pass # Best effort cleanup
598
+
599
+ return documents
600
+
601
+ async def _count_documents(
602
+ self, filter: dict[str, Any] | None = None
603
+ ) -> int:
604
+ """Count documents in index.
605
+
606
+ Args:
607
+ filter: Optional query filter.
608
+
609
+ Returns:
610
+ Document count.
611
+ """
612
+ if not self._is_connected:
613
+ await self.connect_async()
614
+
615
+ query = filter or {"match_all": {}}
616
+ response = await self._client.count(
617
+ index=self.config.index,
618
+ query=query,
619
+ )
620
+ return response.get("count", 0)
621
+
622
+ def _normalize_document(self, doc: dict[str, Any]) -> dict[str, Any]:
623
+ """Normalize document for Polars compatibility.
624
+
625
+ Args:
626
+ doc: Elasticsearch document.
627
+
628
+ Returns:
629
+ Normalized document.
630
+ """
631
+ # ES documents are generally JSON-compatible
632
+ # Flatten if configured
633
+ if self.config.flatten_nested:
634
+ return self._schema_inferrer.flatten_document(doc)
635
+ return doc
636
+
637
+ # -------------------------------------------------------------------------
638
+ # Search and Aggregation
639
+ # -------------------------------------------------------------------------
640
+
641
+ async def search_async(
642
+ self,
643
+ query: dict[str, Any] | None = None,
644
+ size: int = 10,
645
+ sort: list[dict[str, Any]] | None = None,
646
+ source: list[str] | bool | None = None,
647
+ **kwargs: Any,
648
+ ) -> list[dict[str, Any]]:
649
+ """Execute search query.
650
+
651
+ Args:
652
+ query: Elasticsearch query DSL.
653
+ size: Number of results.
654
+ sort: Sort specification.
655
+ source: Fields to include/exclude.
656
+ **kwargs: Additional search parameters.
657
+
658
+ Returns:
659
+ List of matching documents.
660
+
661
+ Example:
662
+ >>> results = await source.search_async(
663
+ ... query={"match": {"title": "python"}},
664
+ ... size=20,
665
+ ... sort=[{"date": "desc"}],
666
+ ... )
667
+ """
668
+ if not self._is_connected:
669
+ await self.connect_async()
670
+
671
+ search_kwargs: dict[str, Any] = {
672
+ "index": self.config.index,
673
+ "size": size,
674
+ "query": query or {"match_all": {}},
675
+ }
676
+
677
+ if sort:
678
+ search_kwargs["sort"] = sort
679
+ if source is not None:
680
+ search_kwargs["source"] = source
681
+ search_kwargs.update(kwargs)
682
+
683
+ response = await self._client.search(**search_kwargs)
684
+ hits = response.get("hits", {}).get("hits", [])
685
+ return [hit["_source"] for hit in hits]
686
+
687
+ async def aggregate_async(
688
+ self,
689
+ aggs: dict[str, Any],
690
+ query: dict[str, Any] | None = None,
691
+ size: int = 0,
692
+ ) -> dict[str, Any]:
693
+ """Execute aggregation.
694
+
695
+ Args:
696
+ aggs: Aggregation specification.
697
+ query: Optional query filter.
698
+ size: Number of hits to return (0 for aggs only).
699
+
700
+ Returns:
701
+ Aggregation results.
702
+
703
+ Example:
704
+ >>> aggs_result = await source.aggregate_async(
705
+ ... aggs={
706
+ ... "status_counts": {
707
+ ... "terms": {"field": "status"}
708
+ ... },
709
+ ... "avg_price": {
710
+ ... "avg": {"field": "price"}
711
+ ... },
712
+ ... },
713
+ ... query={"range": {"date": {"gte": "2024-01-01"}}},
714
+ ... )
715
+ """
716
+ if not self._is_connected:
717
+ await self.connect_async()
718
+
719
+ response = await self._client.search(
720
+ index=self.config.index,
721
+ size=size,
722
+ query=query or {"match_all": {}},
723
+ aggs=aggs,
724
+ )
725
+ return response.get("aggregations", {})
726
+
727
+ # -------------------------------------------------------------------------
728
+ # Sampling
729
+ # -------------------------------------------------------------------------
730
+
731
+ async def sample_async(
732
+ self, n: int = 1000, seed: int | None = None
733
+ ) -> "ElasticsearchDataSource":
734
+ """Create a sampled data source.
735
+
736
+ Uses Elasticsearch's random_score for sampling.
737
+
738
+ Args:
739
+ n: Number of documents to sample.
740
+ seed: Random seed for reproducibility.
741
+
742
+ Returns:
743
+ New ElasticsearchDataSource with sampled configuration.
744
+ """
745
+ # Create new config with reduced max_documents
746
+ config = ElasticsearchConfig(
747
+ hosts=self.config.hosts,
748
+ cloud_id=self.config.cloud_id,
749
+ api_key=self.config.api_key,
750
+ username=self.config.username,
751
+ password=self.config.password,
752
+ index=self.config.index,
753
+ name=f"{self.name}_sample",
754
+ max_documents=n,
755
+ schema_sample_size=min(n, self.config.schema_sample_size),
756
+ )
757
+ return ElasticsearchDataSource(config)
758
+
759
+ # -------------------------------------------------------------------------
760
+ # Index Information
761
+ # -------------------------------------------------------------------------
762
+
763
+ async def get_index_info_async(self) -> dict[str, Any]:
764
+ """Get index information.
765
+
766
+ Returns:
767
+ Index settings and stats.
768
+ """
769
+ if not self._is_connected:
770
+ await self.connect_async()
771
+
772
+ settings = await self._client.indices.get_settings(
773
+ index=self.config.index
774
+ )
775
+ stats = await self._client.indices.stats(
776
+ index=self.config.index
777
+ )
778
+
779
+ return {
780
+ "settings": settings,
781
+ "stats": stats,
782
+ }
783
+
784
+ async def get_field_caps_async(
785
+ self, fields: list[str] | None = None
786
+ ) -> dict[str, Any]:
787
+ """Get field capabilities.
788
+
789
+ Args:
790
+ fields: Fields to get capabilities for.
791
+
792
+ Returns:
793
+ Field capabilities.
794
+ """
795
+ if not self._is_connected:
796
+ await self.connect_async()
797
+
798
+ return await self._client.field_caps(
799
+ index=self.config.index,
800
+ fields=fields or ["*"],
801
+ )