truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
truthound/ml/base.py ADDED
@@ -0,0 +1,1178 @@
1
+ """Base classes and core abstractions for ML-based validation.
2
+
3
+ This module provides the foundational abstractions for the ML system:
4
+ - MLModel: Abstract base class for all ML models
5
+ - ModelRegistry: Dynamic registration and management of ML models
6
+ - AnomalyDetector: Base class for anomaly detection models
7
+ - DriftDetector: Base class for drift detection models
8
+ - RuleLearner: Base class for rule learning models
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ from abc import ABC, abstractmethod
14
+ from dataclasses import dataclass, field
15
+ from datetime import datetime
16
+ from enum import Enum
17
+ from pathlib import Path
18
+ from typing import (
19
+ TYPE_CHECKING,
20
+ Any,
21
+ Callable,
22
+ Generic,
23
+ Iterator,
24
+ Protocol,
25
+ TypeVar,
26
+ runtime_checkable,
27
+ )
28
+ import threading
29
+ import json
30
+
31
+ import polars as pl
32
+
33
+ if TYPE_CHECKING:
34
+ from truthound.validators.base import ValidationIssue
35
+
36
+
37
+ # =============================================================================
38
+ # Enums
39
+ # =============================================================================
40
+
41
+
42
+ class ModelType(str, Enum):
43
+ """Types of ML models supported."""
44
+
45
+ ANOMALY_DETECTOR = "anomaly_detector"
46
+ DRIFT_DETECTOR = "drift_detector"
47
+ RULE_LEARNER = "rule_learner"
48
+ CLASSIFIER = "classifier"
49
+ REGRESSOR = "regressor"
50
+ CUSTOM = "custom"
51
+
52
+
53
+ class ModelState(str, Enum):
54
+ """Lifecycle states for ML models."""
55
+
56
+ UNTRAINED = "untrained"
57
+ TRAINING = "training"
58
+ TRAINED = "trained"
59
+ VALIDATING = "validating"
60
+ READY = "ready"
61
+ ERROR = "error"
62
+ DEPRECATED = "deprecated"
63
+
64
+
65
+ class AnomalyType(str, Enum):
66
+ """Types of anomalies detected."""
67
+
68
+ POINT = "point" # Single data point anomaly
69
+ CONTEXTUAL = "contextual" # Anomaly in context
70
+ COLLECTIVE = "collective" # Group of data points
71
+ PATTERN = "pattern" # Pattern-based anomaly
72
+ TREND = "trend" # Trend deviation
73
+ SEASONAL = "seasonal" # Seasonal pattern violation
74
+
75
+
76
+ class SeverityLevel(str, Enum):
77
+ """Severity levels for ML-detected issues."""
78
+
79
+ INFO = "info"
80
+ LOW = "low"
81
+ MEDIUM = "medium"
82
+ HIGH = "high"
83
+ CRITICAL = "critical"
84
+
85
+
86
+ # =============================================================================
87
+ # Exceptions
88
+ # =============================================================================
89
+
90
+
91
+ class MLError(Exception):
92
+ """Base exception for ML-related errors."""
93
+
94
+ def __init__(self, message: str, model_name: str | None = None):
95
+ self.model_name = model_name
96
+ super().__init__(message)
97
+
98
+
99
+ class ModelNotTrainedError(MLError):
100
+ """Raised when trying to use an untrained model."""
101
+
102
+ pass
103
+
104
+
105
+ class ModelTrainingError(MLError):
106
+ """Raised when model training fails."""
107
+
108
+ pass
109
+
110
+
111
+ class ModelLoadError(MLError):
112
+ """Raised when model loading fails."""
113
+
114
+ pass
115
+
116
+
117
+ class InsufficientDataError(MLError):
118
+ """Raised when there's not enough data for ML operations."""
119
+
120
+ pass
121
+
122
+
123
+ # =============================================================================
124
+ # Configuration Classes
125
+ # =============================================================================
126
+
127
+
128
+ @dataclass
129
+ class MLConfig:
130
+ """Base configuration for ML operations.
131
+
132
+ Attributes:
133
+ sample_size: Maximum samples to use for training
134
+ random_seed: Random seed for reproducibility
135
+ n_jobs: Number of parallel jobs (-1 for all cores)
136
+ cache_predictions: Whether to cache prediction results
137
+ verbose: Verbosity level (0=silent, 1=progress, 2=debug)
138
+ """
139
+
140
+ sample_size: int | None = None
141
+ random_seed: int = 42
142
+ n_jobs: int = 1
143
+ cache_predictions: bool = True
144
+ verbose: int = 0
145
+ metadata: dict[str, Any] = field(default_factory=dict)
146
+
147
+
148
+ @dataclass
149
+ class AnomalyConfig(MLConfig):
150
+ """Configuration for anomaly detection.
151
+
152
+ Attributes:
153
+ contamination: Expected proportion of outliers (0.0 to 0.5)
154
+ sensitivity: Detection sensitivity (0.0 to 1.0)
155
+ min_samples: Minimum samples required for detection
156
+ window_size: Window size for temporal anomaly detection
157
+ columns: Specific columns to analyze (None for all numeric columns)
158
+ """
159
+
160
+ contamination: float = 0.1
161
+ sensitivity: float = 0.5
162
+ min_samples: int = 100
163
+ window_size: int | None = None
164
+ score_threshold: float | None = None
165
+ columns: list[str] | None = None
166
+
167
+
168
+ @dataclass
169
+ class DriftConfig(MLConfig):
170
+ """Configuration for drift detection.
171
+
172
+ Attributes:
173
+ reference_window: Size of reference window
174
+ detection_window: Size of detection window
175
+ threshold: Drift detection threshold
176
+ min_samples_per_window: Minimum samples per window
177
+ """
178
+
179
+ reference_window: int = 1000
180
+ detection_window: int = 100
181
+ threshold: float = 0.05
182
+ min_samples_per_window: int = 30
183
+ detect_gradual: bool = True
184
+ detect_sudden: bool = True
185
+
186
+
187
+ @dataclass
188
+ class RuleLearningConfig(MLConfig):
189
+ """Configuration for rule learning.
190
+
191
+ Attributes:
192
+ min_support: Minimum support for rules
193
+ min_confidence: Minimum confidence for rules
194
+ max_rules: Maximum number of rules to generate
195
+ max_antecedent_length: Maximum length of rule antecedent
196
+ """
197
+
198
+ min_support: float = 0.1
199
+ min_confidence: float = 0.8
200
+ max_rules: int = 100
201
+ max_antecedent_length: int = 3
202
+ include_negations: bool = False
203
+
204
+
205
+ ConfigT = TypeVar("ConfigT", bound=MLConfig)
206
+
207
+
208
+ # =============================================================================
209
+ # Result Classes
210
+ # =============================================================================
211
+
212
+
213
+ @dataclass(frozen=True)
214
+ class AnomalyScore:
215
+ """Score for a single data point or window.
216
+
217
+ Attributes:
218
+ index: Index or identifier of the data point
219
+ score: Anomaly score (higher = more anomalous)
220
+ is_anomaly: Whether classified as anomaly
221
+ anomaly_type: Type of anomaly detected
222
+ confidence: Confidence in the classification
223
+ contributing_features: Features contributing to the anomaly
224
+ """
225
+
226
+ index: int | str
227
+ score: float
228
+ is_anomaly: bool
229
+ anomaly_type: AnomalyType = AnomalyType.POINT
230
+ confidence: float = 1.0
231
+ contributing_features: tuple[str, ...] = field(default_factory=tuple)
232
+ details: str | None = None
233
+
234
+ def to_dict(self) -> dict[str, Any]:
235
+ return {
236
+ "index": self.index,
237
+ "score": round(self.score, 6),
238
+ "is_anomaly": self.is_anomaly,
239
+ "anomaly_type": self.anomaly_type.value,
240
+ "confidence": round(self.confidence, 4),
241
+ "contributing_features": list(self.contributing_features),
242
+ "details": self.details,
243
+ }
244
+
245
+
246
+ @dataclass(frozen=True)
247
+ class AnomalyResult:
248
+ """Complete result of anomaly detection.
249
+
250
+ Attributes:
251
+ scores: Individual anomaly scores
252
+ anomaly_count: Total number of anomalies detected
253
+ anomaly_ratio: Ratio of anomalies to total points
254
+ model_name: Name of the model used
255
+ detection_time_ms: Time taken for detection in milliseconds
256
+ """
257
+
258
+ scores: tuple[AnomalyScore, ...] = field(default_factory=tuple)
259
+ anomaly_count: int = 0
260
+ anomaly_ratio: float = 0.0
261
+ total_points: int = 0
262
+ model_name: str = ""
263
+ detection_time_ms: float = 0.0
264
+ threshold_used: float | None = None
265
+ detected_at: datetime = field(default_factory=datetime.now)
266
+
267
+ def __iter__(self) -> Iterator[AnomalyScore]:
268
+ return iter(self.scores)
269
+
270
+ def get_anomalies(self) -> tuple[AnomalyScore, ...]:
271
+ """Get only the anomalous scores."""
272
+ return tuple(s for s in self.scores if s.is_anomaly)
273
+
274
+ def to_dict(self) -> dict[str, Any]:
275
+ return {
276
+ "anomaly_count": self.anomaly_count,
277
+ "anomaly_ratio": round(self.anomaly_ratio, 4),
278
+ "total_points": self.total_points,
279
+ "model_name": self.model_name,
280
+ "detection_time_ms": round(self.detection_time_ms, 2),
281
+ "threshold_used": self.threshold_used,
282
+ "detected_at": self.detected_at.isoformat(),
283
+ "anomalies": [s.to_dict() for s in self.get_anomalies()],
284
+ }
285
+
286
+
287
+ @dataclass(frozen=True)
288
+ class DriftResult:
289
+ """Result of drift detection analysis.
290
+
291
+ Attributes:
292
+ is_drifted: Whether drift was detected
293
+ drift_score: Overall drift score
294
+ column_scores: Per-column drift scores
295
+ drift_type: Type of drift (gradual, sudden, etc.)
296
+ """
297
+
298
+ is_drifted: bool = False
299
+ drift_score: float = 0.0
300
+ column_scores: tuple[tuple[str, float], ...] = field(default_factory=tuple)
301
+ drift_type: str = "none"
302
+ p_value: float | None = None
303
+ confidence: float = 1.0
304
+ details: str | None = None
305
+ detected_at: datetime = field(default_factory=datetime.now)
306
+
307
+ def get_drifted_columns(self, threshold: float = 0.5) -> list[str]:
308
+ """Get columns with drift score above threshold."""
309
+ return [col for col, score in self.column_scores if score >= threshold]
310
+
311
+ def to_dict(self) -> dict[str, Any]:
312
+ return {
313
+ "is_drifted": self.is_drifted,
314
+ "drift_score": round(self.drift_score, 6),
315
+ "drift_type": self.drift_type,
316
+ "p_value": round(self.p_value, 6) if self.p_value else None,
317
+ "confidence": round(self.confidence, 4),
318
+ "column_scores": {col: round(score, 6) for col, score in self.column_scores},
319
+ "details": self.details,
320
+ "detected_at": self.detected_at.isoformat(),
321
+ }
322
+
323
+
324
+ @dataclass(frozen=True)
325
+ class LearnedRule:
326
+ """A validation rule learned from data.
327
+
328
+ Attributes:
329
+ name: Rule name/identifier
330
+ rule_type: Type of rule (e.g., 'range', 'pattern', 'constraint')
331
+ column: Target column(s)
332
+ condition: Rule condition expression
333
+ support: Proportion of data supporting the rule
334
+ confidence: Rule confidence
335
+ """
336
+
337
+ name: str
338
+ rule_type: str
339
+ column: str | tuple[str, ...]
340
+ condition: str
341
+ support: float
342
+ confidence: float
343
+ validator_config: dict[str, Any] = field(default_factory=dict)
344
+ description: str = ""
345
+
346
+ def to_dict(self) -> dict[str, Any]:
347
+ return {
348
+ "name": self.name,
349
+ "rule_type": self.rule_type,
350
+ "column": self.column if isinstance(self.column, str) else list(self.column),
351
+ "condition": self.condition,
352
+ "support": round(self.support, 4),
353
+ "confidence": round(self.confidence, 4),
354
+ "validator_config": self.validator_config,
355
+ "description": self.description,
356
+ }
357
+
358
+ def to_validator_spec(self) -> dict[str, Any]:
359
+ """Convert to validator specification for use with Truthound."""
360
+ return {
361
+ "type": self.rule_type,
362
+ "columns": [self.column] if isinstance(self.column, str) else list(self.column),
363
+ **self.validator_config,
364
+ }
365
+
366
+
367
+ @dataclass(frozen=True)
368
+ class RuleLearningResult:
369
+ """Result of rule learning process.
370
+
371
+ Attributes:
372
+ rules: Learned validation rules
373
+ data_profile: Profile of the data used for learning
374
+ learning_time_ms: Time taken for learning
375
+ """
376
+
377
+ rules: tuple[LearnedRule, ...] = field(default_factory=tuple)
378
+ total_rules: int = 0
379
+ filtered_rules: int = 0 # Rules filtered by min_confidence/support
380
+ learning_time_ms: float = 0.0
381
+ data_profile: dict[str, Any] = field(default_factory=dict)
382
+ learned_at: datetime = field(default_factory=datetime.now)
383
+
384
+ def __iter__(self) -> Iterator[LearnedRule]:
385
+ return iter(self.rules)
386
+
387
+ def get_rules_by_type(self, rule_type: str) -> list[LearnedRule]:
388
+ """Get rules of a specific type."""
389
+ return [r for r in self.rules if r.rule_type == rule_type]
390
+
391
+ def get_rules_for_column(self, column: str) -> list[LearnedRule]:
392
+ """Get rules for a specific column."""
393
+ return [r for r in self.rules if column in (
394
+ [r.column] if isinstance(r.column, str) else list(r.column)
395
+ )]
396
+
397
+ def to_validation_suite(self) -> dict[str, Any]:
398
+ """Convert to validation suite format."""
399
+ return {
400
+ "validators": [r.to_validator_spec() for r in self.rules],
401
+ "generated_at": self.learned_at.isoformat(),
402
+ "total_rules": self.total_rules,
403
+ }
404
+
405
+ def to_dict(self) -> dict[str, Any]:
406
+ return {
407
+ "total_rules": self.total_rules,
408
+ "filtered_rules": self.filtered_rules,
409
+ "learning_time_ms": round(self.learning_time_ms, 2),
410
+ "rules": [r.to_dict() for r in self.rules],
411
+ "data_profile": self.data_profile,
412
+ "learned_at": self.learned_at.isoformat(),
413
+ }
414
+
415
+
416
+ # =============================================================================
417
+ # Model Metadata
418
+ # =============================================================================
419
+
420
+
421
+ @dataclass(frozen=True)
422
+ class ModelInfo:
423
+ """Metadata about an ML model.
424
+
425
+ Attributes:
426
+ name: Unique model identifier
427
+ version: Model version
428
+ model_type: Type of model
429
+ description: Human-readable description
430
+ author: Model author
431
+ created_at: Creation timestamp
432
+ input_schema: Expected input schema
433
+ output_schema: Output schema
434
+ """
435
+
436
+ name: str
437
+ version: str
438
+ model_type: ModelType
439
+ description: str = ""
440
+ author: str = ""
441
+ created_at: datetime = field(default_factory=datetime.now)
442
+ input_features: tuple[str, ...] = field(default_factory=tuple)
443
+ supports_incremental: bool = False
444
+ supports_online_learning: bool = False
445
+ min_samples_required: int = 10
446
+ tags: tuple[str, ...] = field(default_factory=tuple)
447
+
448
+ def to_dict(self) -> dict[str, Any]:
449
+ return {
450
+ "name": self.name,
451
+ "version": self.version,
452
+ "model_type": self.model_type.value,
453
+ "description": self.description,
454
+ "author": self.author,
455
+ "created_at": self.created_at.isoformat(),
456
+ "input_features": list(self.input_features),
457
+ "supports_incremental": self.supports_incremental,
458
+ "supports_online_learning": self.supports_online_learning,
459
+ "min_samples_required": self.min_samples_required,
460
+ "tags": list(self.tags),
461
+ }
462
+
463
+
464
+ # =============================================================================
465
+ # Base ML Model Class
466
+ # =============================================================================
467
+
468
+
469
+ class MLModel(ABC, Generic[ConfigT]):
470
+ """Abstract base class for all ML models.
471
+
472
+ This provides the foundational interface for ML models in Truthound.
473
+ Subclasses must implement fit() and predict() methods.
474
+
475
+ Example:
476
+ class MyAnomalyModel(MLModel[AnomalyConfig]):
477
+ @property
478
+ def info(self) -> ModelInfo:
479
+ return ModelInfo(
480
+ name="my-anomaly",
481
+ version="1.0.0",
482
+ model_type=ModelType.ANOMALY_DETECTOR,
483
+ )
484
+
485
+ def fit(self, data: pl.LazyFrame) -> None:
486
+ # Train the model
487
+ ...
488
+
489
+ def predict(self, data: pl.LazyFrame) -> Any:
490
+ # Make predictions
491
+ ...
492
+ """
493
+
494
+ def __init__(self, config: ConfigT | None = None, **kwargs: Any):
495
+ """Initialize the model.
496
+
497
+ Args:
498
+ config: Model configuration
499
+ **kwargs: Additional parameters that override config
500
+ """
501
+ self._config: ConfigT = config or self._default_config() # type: ignore
502
+ self._state: ModelState = ModelState.UNTRAINED
503
+ self._error: Exception | None = None
504
+ self._trained_at: datetime | None = None
505
+ self._training_samples: int = 0
506
+ self._lock = threading.RLock()
507
+
508
+ # Apply kwargs overrides
509
+ for key, value in kwargs.items():
510
+ if hasattr(self._config, key):
511
+ object.__setattr__(self._config, key, value)
512
+
513
+ @property
514
+ @abstractmethod
515
+ def info(self) -> ModelInfo:
516
+ """Return model metadata.
517
+
518
+ Returns:
519
+ ModelInfo with model name, version, type, etc.
520
+ """
521
+ ...
522
+
523
+ @abstractmethod
524
+ def fit(self, data: pl.LazyFrame) -> None:
525
+ """Train the model on data.
526
+
527
+ Args:
528
+ data: Training data as LazyFrame
529
+
530
+ Raises:
531
+ ModelTrainingError: If training fails
532
+ InsufficientDataError: If not enough data
533
+ """
534
+ ...
535
+
536
+ @abstractmethod
537
+ def predict(self, data: pl.LazyFrame) -> Any:
538
+ """Make predictions on new data.
539
+
540
+ Args:
541
+ data: Data to predict on
542
+
543
+ Returns:
544
+ Predictions (type depends on model)
545
+
546
+ Raises:
547
+ ModelNotTrainedError: If model not trained
548
+ """
549
+ ...
550
+
551
+ def fit_predict(self, data: pl.LazyFrame) -> Any:
552
+ """Train and predict in one step.
553
+
554
+ Args:
555
+ data: Data to train on and predict
556
+
557
+ Returns:
558
+ Predictions on the training data
559
+ """
560
+ self.fit(data)
561
+ return self.predict(data)
562
+
563
+ def partial_fit(self, data: pl.LazyFrame) -> None:
564
+ """Incrementally update the model with new data.
565
+
566
+ Override this for models that support online learning.
567
+
568
+ Args:
569
+ data: New data to learn from
570
+
571
+ Raises:
572
+ NotImplementedError: If not supported
573
+ """
574
+ if not self.info.supports_online_learning:
575
+ raise NotImplementedError(
576
+ f"{self.info.name} does not support online learning"
577
+ )
578
+ # Default implementation: just refit
579
+ self.fit(data)
580
+
581
+ def save(self, path: str | Path) -> None:
582
+ """Save the model to disk.
583
+
584
+ Args:
585
+ path: Path to save the model
586
+
587
+ Raises:
588
+ ModelNotTrainedError: If model not trained
589
+ """
590
+ if self._state not in (ModelState.TRAINED, ModelState.READY):
591
+ raise ModelNotTrainedError(
592
+ "Cannot save untrained model",
593
+ model_name=self.info.name,
594
+ )
595
+
596
+ path = Path(path)
597
+ path.parent.mkdir(parents=True, exist_ok=True)
598
+
599
+ model_data = self._serialize()
600
+ with open(path, "w") as f:
601
+ json.dump(model_data, f, indent=2, default=str)
602
+
603
+ def load(self, path: str | Path) -> None:
604
+ """Load the model from disk.
605
+
606
+ Args:
607
+ path: Path to load the model from
608
+
609
+ Raises:
610
+ ModelLoadError: If loading fails
611
+ """
612
+ path = Path(path)
613
+ if not path.exists():
614
+ raise ModelLoadError(
615
+ f"Model file not found: {path}",
616
+ model_name=self.info.name,
617
+ )
618
+
619
+ try:
620
+ with open(path) as f:
621
+ model_data = json.load(f)
622
+ self._deserialize(model_data)
623
+ self._state = ModelState.READY
624
+ except Exception as e:
625
+ self._state = ModelState.ERROR
626
+ self._error = e
627
+ raise ModelLoadError(
628
+ f"Failed to load model: {e}",
629
+ model_name=self.info.name,
630
+ ) from e
631
+
632
+ def _serialize(self) -> dict[str, Any]:
633
+ """Serialize model state for saving.
634
+
635
+ Override in subclasses to save model-specific state.
636
+ """
637
+ return {
638
+ "info": self.info.to_dict(),
639
+ "state": self._state.value,
640
+ "trained_at": self._trained_at.isoformat() if self._trained_at else None,
641
+ "training_samples": self._training_samples,
642
+ }
643
+
644
+ def _deserialize(self, data: dict[str, Any]) -> None:
645
+ """Deserialize model state from saved data.
646
+
647
+ Override in subclasses to restore model-specific state.
648
+ """
649
+ self._trained_at = (
650
+ datetime.fromisoformat(data["trained_at"])
651
+ if data.get("trained_at")
652
+ else None
653
+ )
654
+ self._training_samples = data.get("training_samples", 0)
655
+
656
+ def _default_config(self) -> MLConfig:
657
+ """Return default configuration.
658
+
659
+ Override in subclasses with specific config types.
660
+ """
661
+ return MLConfig()
662
+
663
+ def _validate_data(self, data: pl.LazyFrame, min_samples: int | None = None) -> int:
664
+ """Validate input data and return row count.
665
+
666
+ Args:
667
+ data: Data to validate
668
+ min_samples: Minimum required samples
669
+
670
+ Returns:
671
+ Number of rows in data
672
+
673
+ Raises:
674
+ InsufficientDataError: If not enough data
675
+ """
676
+ row_count = data.select(pl.len()).collect().item()
677
+ min_required = min_samples or self.info.min_samples_required
678
+
679
+ if row_count < min_required:
680
+ raise InsufficientDataError(
681
+ f"Need at least {min_required} samples, got {row_count}",
682
+ model_name=self.info.name,
683
+ )
684
+
685
+ return row_count
686
+
687
+ def _maybe_sample(self, data: pl.LazyFrame) -> pl.LazyFrame:
688
+ """Apply sampling if configured."""
689
+ if self._config.sample_size is not None:
690
+ return data.head(self._config.sample_size)
691
+ return data
692
+
693
+ @property
694
+ def config(self) -> ConfigT:
695
+ """Get model configuration."""
696
+ return self._config
697
+
698
+ @property
699
+ def state(self) -> ModelState:
700
+ """Get current model state."""
701
+ return self._state
702
+
703
+ @property
704
+ def is_trained(self) -> bool:
705
+ """Check if model is trained and ready."""
706
+ return self._state in (ModelState.TRAINED, ModelState.READY)
707
+
708
+ @property
709
+ def error(self) -> Exception | None:
710
+ """Get error if model is in error state."""
711
+ return self._error
712
+
713
+ @property
714
+ def training_info(self) -> dict[str, Any]:
715
+ """Get training information."""
716
+ return {
717
+ "trained_at": self._trained_at.isoformat() if self._trained_at else None,
718
+ "training_samples": self._training_samples,
719
+ "state": self._state.value,
720
+ }
721
+
722
+ def __repr__(self) -> str:
723
+ return (
724
+ f"<{self.__class__.__name__} "
725
+ f"name={self.info.name!r} "
726
+ f"state={self.state.value!r}>"
727
+ )
728
+
729
+
730
+ # =============================================================================
731
+ # Specialized Base Classes
732
+ # =============================================================================
733
+
734
+
735
+ class AnomalyDetector(MLModel[AnomalyConfig]):
736
+ """Abstract base class for anomaly detection models.
737
+
738
+ Provides specialized interface for anomaly detection including
739
+ score computation and threshold-based classification.
740
+ """
741
+
742
+ @property
743
+ def info(self) -> ModelInfo:
744
+ return ModelInfo(
745
+ name=self._get_model_name(),
746
+ version=self._get_model_version(),
747
+ model_type=ModelType.ANOMALY_DETECTOR,
748
+ description=self._get_description(),
749
+ )
750
+
751
+ def _get_model_name(self) -> str:
752
+ """Override to provide model name."""
753
+ return self.__class__.__name__.lower().replace("detector", "")
754
+
755
+ def _get_model_version(self) -> str:
756
+ """Override to provide model version."""
757
+ return "1.0.0"
758
+
759
+ def _get_description(self) -> str:
760
+ """Override to provide description."""
761
+ return self.__class__.__doc__ or ""
762
+
763
+ @abstractmethod
764
+ def score(self, data: pl.LazyFrame) -> pl.Series:
765
+ """Compute anomaly scores for data.
766
+
767
+ Args:
768
+ data: Data to score
769
+
770
+ Returns:
771
+ Series of anomaly scores (higher = more anomalous)
772
+ """
773
+ ...
774
+
775
+ def predict(self, data: pl.LazyFrame) -> AnomalyResult:
776
+ """Detect anomalies in data.
777
+
778
+ Args:
779
+ data: Data to analyze
780
+
781
+ Returns:
782
+ AnomalyResult with detected anomalies
783
+ """
784
+ import time
785
+ start = time.perf_counter()
786
+
787
+ if not self.is_trained:
788
+ raise ModelNotTrainedError(
789
+ "Model must be trained before prediction",
790
+ model_name=self.info.name,
791
+ )
792
+
793
+ scores = self.score(data)
794
+ threshold = self._get_threshold()
795
+
796
+ anomaly_scores = []
797
+ for idx, score in enumerate(scores.to_list()):
798
+ is_anomaly = score >= threshold
799
+ anomaly_scores.append(
800
+ AnomalyScore(
801
+ index=idx,
802
+ score=score,
803
+ is_anomaly=is_anomaly,
804
+ anomaly_type=AnomalyType.POINT,
805
+ confidence=min(1.0, score / threshold) if threshold > 0 else 1.0,
806
+ )
807
+ )
808
+
809
+ anomaly_count = sum(1 for s in anomaly_scores if s.is_anomaly)
810
+ total_points = len(anomaly_scores)
811
+
812
+ elapsed = (time.perf_counter() - start) * 1000
813
+
814
+ return AnomalyResult(
815
+ scores=tuple(anomaly_scores),
816
+ anomaly_count=anomaly_count,
817
+ anomaly_ratio=anomaly_count / total_points if total_points > 0 else 0.0,
818
+ total_points=total_points,
819
+ model_name=self.info.name,
820
+ detection_time_ms=elapsed,
821
+ threshold_used=threshold,
822
+ )
823
+
824
+ def _get_threshold(self) -> float:
825
+ """Get the threshold for anomaly classification."""
826
+ if self.config.score_threshold is not None:
827
+ return self.config.score_threshold
828
+ # Default: use contamination to determine threshold
829
+ return 1.0 - self.config.contamination
830
+
831
+ def _default_config(self) -> AnomalyConfig:
832
+ return AnomalyConfig()
833
+
834
+
835
+ class MLDriftDetector(MLModel[DriftConfig]):
836
+ """Abstract base class for ML-based drift detection.
837
+
838
+ Extends the statistical drift detection in truthound.drift
839
+ with ML-based approaches.
840
+ """
841
+
842
+ @property
843
+ def info(self) -> ModelInfo:
844
+ return ModelInfo(
845
+ name=self._get_model_name(),
846
+ version=self._get_model_version(),
847
+ model_type=ModelType.DRIFT_DETECTOR,
848
+ description=self._get_description(),
849
+ supports_incremental=True,
850
+ )
851
+
852
+ def _get_model_name(self) -> str:
853
+ return self.__class__.__name__.lower().replace("detector", "")
854
+
855
+ def _get_model_version(self) -> str:
856
+ return "1.0.0"
857
+
858
+ def _get_description(self) -> str:
859
+ return self.__class__.__doc__ or ""
860
+
861
+ @abstractmethod
862
+ def detect(
863
+ self,
864
+ reference: pl.LazyFrame,
865
+ current: pl.LazyFrame,
866
+ columns: list[str] | None = None,
867
+ ) -> DriftResult:
868
+ """Detect drift between reference and current data.
869
+
870
+ Args:
871
+ reference: Reference (baseline) data
872
+ current: Current data to compare
873
+ columns: Specific columns to check (None = all)
874
+
875
+ Returns:
876
+ DriftResult with drift analysis
877
+ """
878
+ ...
879
+
880
+ def predict(self, data: pl.LazyFrame) -> DriftResult:
881
+ """Predict drift using stored reference data.
882
+
883
+ Requires that fit() was called to store reference data.
884
+ """
885
+ if not self.is_trained:
886
+ raise ModelNotTrainedError(
887
+ "Model must be trained with reference data first",
888
+ model_name=self.info.name,
889
+ )
890
+ return self.detect(self._reference_data, data)
891
+
892
+ def fit(self, data: pl.LazyFrame) -> None:
893
+ """Store reference data for drift detection.
894
+
895
+ Args:
896
+ data: Reference data to store
897
+ """
898
+ import time
899
+
900
+ start = time.perf_counter()
901
+ self._state = ModelState.TRAINING
902
+
903
+ try:
904
+ row_count = self._validate_data(data)
905
+ self._reference_data = self._maybe_sample(data)
906
+ self._training_samples = row_count
907
+ self._trained_at = datetime.now()
908
+ self._state = ModelState.TRAINED
909
+ except Exception as e:
910
+ self._state = ModelState.ERROR
911
+ self._error = e
912
+ raise ModelTrainingError(
913
+ f"Failed to store reference data: {e}",
914
+ model_name=self.info.name,
915
+ ) from e
916
+
917
+ def _default_config(self) -> DriftConfig:
918
+ return DriftConfig()
919
+
920
+
921
+ class RuleLearner(MLModel[RuleLearningConfig]):
922
+ """Abstract base class for rule learning models.
923
+
924
+ Learns validation rules from data characteristics.
925
+ """
926
+
927
+ @property
928
+ def info(self) -> ModelInfo:
929
+ return ModelInfo(
930
+ name=self._get_model_name(),
931
+ version=self._get_model_version(),
932
+ model_type=ModelType.RULE_LEARNER,
933
+ description=self._get_description(),
934
+ )
935
+
936
+ def _get_model_name(self) -> str:
937
+ return self.__class__.__name__.lower().replace("learner", "")
938
+
939
+ def _get_model_version(self) -> str:
940
+ return "1.0.0"
941
+
942
+ def _get_description(self) -> str:
943
+ return self.__class__.__doc__ or ""
944
+
945
+ @abstractmethod
946
+ def learn_rules(self, data: pl.LazyFrame) -> RuleLearningResult:
947
+ """Learn validation rules from data.
948
+
949
+ Args:
950
+ data: Data to analyze
951
+
952
+ Returns:
953
+ RuleLearningResult with learned rules
954
+ """
955
+ ...
956
+
957
+ def fit(self, data: pl.LazyFrame) -> None:
958
+ """Learn rules from data (alias for learn_rules).
959
+
960
+ Args:
961
+ data: Training data
962
+ """
963
+ import time
964
+
965
+ start = time.perf_counter()
966
+ self._state = ModelState.TRAINING
967
+
968
+ try:
969
+ row_count = self._validate_data(data)
970
+ self._learned_rules = self.learn_rules(data)
971
+ self._training_samples = row_count
972
+ self._trained_at = datetime.now()
973
+ self._state = ModelState.TRAINED
974
+ except Exception as e:
975
+ self._state = ModelState.ERROR
976
+ self._error = e
977
+ raise ModelTrainingError(
978
+ f"Failed to learn rules: {e}",
979
+ model_name=self.info.name,
980
+ ) from e
981
+
982
+ def predict(self, data: pl.LazyFrame) -> RuleLearningResult:
983
+ """Return learned rules (rules don't make predictions per se)."""
984
+ if not self.is_trained:
985
+ raise ModelNotTrainedError(
986
+ "Model must be trained first",
987
+ model_name=self.info.name,
988
+ )
989
+ return self._learned_rules
990
+
991
+ def get_rules(self) -> tuple[LearnedRule, ...]:
992
+ """Get learned rules."""
993
+ if not self.is_trained:
994
+ return tuple()
995
+ return self._learned_rules.rules
996
+
997
+ def _default_config(self) -> RuleLearningConfig:
998
+ return RuleLearningConfig()
999
+
1000
+
1001
+ # =============================================================================
1002
+ # Model Registry
1003
+ # =============================================================================
1004
+
1005
+
1006
+ class ModelRegistry:
1007
+ """Registry for ML model registration and discovery.
1008
+
1009
+ Provides a centralized way to register and retrieve ML models.
1010
+ Thread-safe for concurrent access.
1011
+
1012
+ Example:
1013
+ registry = ModelRegistry()
1014
+ registry.register(IsolationForestDetector)
1015
+
1016
+ # Later
1017
+ model_cls = registry.get("isolation_forest")
1018
+ model = model_cls()
1019
+ """
1020
+
1021
+ _instance: "ModelRegistry | None" = None
1022
+
1023
+ def __new__(cls) -> "ModelRegistry":
1024
+ if cls._instance is None:
1025
+ cls._instance = super().__new__(cls)
1026
+ cls._instance._models: dict[str, type[MLModel]] = {}
1027
+ cls._instance._by_type: dict[ModelType, dict[str, type[MLModel]]] = {}
1028
+ cls._instance._lock = threading.RLock()
1029
+ cls._instance._initialized = False
1030
+ return cls._instance
1031
+
1032
+ def register(
1033
+ self,
1034
+ model_class: type[MLModel],
1035
+ name: str | None = None,
1036
+ ) -> None:
1037
+ """Register a model class.
1038
+
1039
+ Args:
1040
+ model_class: Model class to register
1041
+ name: Optional name override
1042
+ """
1043
+ with self._lock:
1044
+ # Get name from class if not provided
1045
+ instance = model_class.__new__(model_class)
1046
+ instance._config = instance._default_config()
1047
+ model_name = name or instance.info.name
1048
+ model_type = instance.info.model_type
1049
+
1050
+ self._models[model_name] = model_class
1051
+
1052
+ if model_type not in self._by_type:
1053
+ self._by_type[model_type] = {}
1054
+ self._by_type[model_type][model_name] = model_class
1055
+
1056
+ def unregister(self, name: str) -> None:
1057
+ """Unregister a model.
1058
+
1059
+ Args:
1060
+ name: Model name to unregister
1061
+ """
1062
+ with self._lock:
1063
+ if name in self._models:
1064
+ model_class = self._models.pop(name)
1065
+ # Remove from type index
1066
+ for type_dict in self._by_type.values():
1067
+ if name in type_dict:
1068
+ del type_dict[name]
1069
+
1070
+ def get(self, name: str) -> type[MLModel]:
1071
+ """Get a registered model class by name.
1072
+
1073
+ Args:
1074
+ name: Model name
1075
+
1076
+ Returns:
1077
+ Model class
1078
+
1079
+ Raises:
1080
+ KeyError: If model not found
1081
+ """
1082
+ with self._lock:
1083
+ if name not in self._models:
1084
+ raise KeyError(
1085
+ f"Model '{name}' not found. "
1086
+ f"Available: {list(self._models.keys())}"
1087
+ )
1088
+ return self._models[name]
1089
+
1090
+ def get_by_type(self, model_type: ModelType) -> dict[str, type[MLModel]]:
1091
+ """Get all models of a specific type.
1092
+
1093
+ Args:
1094
+ model_type: Type of models to retrieve
1095
+
1096
+ Returns:
1097
+ Dict of model name to model class
1098
+ """
1099
+ with self._lock:
1100
+ return dict(self._by_type.get(model_type, {}))
1101
+
1102
+ def list_all(self) -> list[str]:
1103
+ """List all registered model names."""
1104
+ with self._lock:
1105
+ return list(self._models.keys())
1106
+
1107
+ def list_by_type(self, model_type: ModelType) -> list[str]:
1108
+ """List model names of a specific type."""
1109
+ with self._lock:
1110
+ return list(self._by_type.get(model_type, {}).keys())
1111
+
1112
+ def clear(self) -> None:
1113
+ """Clear all registered models."""
1114
+ with self._lock:
1115
+ self._models.clear()
1116
+ self._by_type.clear()
1117
+ self._initialized = False
1118
+
1119
+
1120
+ # Global registry instance
1121
+ model_registry = ModelRegistry()
1122
+
1123
+
1124
+ def register_model(
1125
+ name: str | None = None,
1126
+ ) -> Callable[[type[MLModel]], type[MLModel]]:
1127
+ """Decorator to register a model class.
1128
+
1129
+ Example:
1130
+ @register_model("my_detector")
1131
+ class MyAnomalyDetector(AnomalyDetector):
1132
+ ...
1133
+ """
1134
+ def decorator(cls: type[MLModel]) -> type[MLModel]:
1135
+ model_registry.register(cls, name)
1136
+ return cls
1137
+ return decorator
1138
+
1139
+
1140
+ # =============================================================================
1141
+ # Protocols
1142
+ # =============================================================================
1143
+
1144
+
1145
+ @runtime_checkable
1146
+ class MLModelProtocol(Protocol):
1147
+ """Protocol for ML models (duck typing support)."""
1148
+
1149
+ @property
1150
+ def info(self) -> ModelInfo: ...
1151
+
1152
+ def fit(self, data: pl.LazyFrame) -> None: ...
1153
+
1154
+ def predict(self, data: pl.LazyFrame) -> Any: ...
1155
+
1156
+ @property
1157
+ def is_trained(self) -> bool: ...
1158
+
1159
+
1160
+ @runtime_checkable
1161
+ class AnomalyDetectorProtocol(Protocol):
1162
+ """Protocol for anomaly detectors."""
1163
+
1164
+ def score(self, data: pl.LazyFrame) -> pl.Series: ...
1165
+
1166
+ def predict(self, data: pl.LazyFrame) -> AnomalyResult: ...
1167
+
1168
+
1169
+ @runtime_checkable
1170
+ class DriftDetectorProtocol(Protocol):
1171
+ """Protocol for drift detectors."""
1172
+
1173
+ def detect(
1174
+ self,
1175
+ reference: pl.LazyFrame,
1176
+ current: pl.LazyFrame,
1177
+ columns: list[str] | None = None,
1178
+ ) -> DriftResult: ...