truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,719 @@
1
+ """SGD-based online learning for memory-efficient model training.
2
+
3
+ This module provides incremental/online learning implementations for
4
+ algorithms that traditionally require O(n²) or O(n³) memory for training.
5
+
6
+ Key Algorithms:
7
+ - SGDOneClassSVM: Online One-Class SVM using SGD
8
+ - IncrementalPCA: Streaming PCA for dimensionality reduction
9
+ - OnlineIsolationForest: Incremental tree building
10
+
11
+ Memory Complexity:
12
+ - Traditional SVM: O(n²) for kernel matrix
13
+ - SGD SVM: O(1) per sample, O(d) for model weights
14
+
15
+ Usage:
16
+ class MemoryEfficientSVM(AnomalyValidator, SGDOnlineMixin):
17
+ def validate(self, lf):
18
+ # Stream data through online learner
19
+ model = self.create_online_svm()
20
+ for chunk in self.iterate_chunks(lf):
21
+ model.partial_fit(chunk)
22
+
23
+ # Predict on new data
24
+ predictions = model.predict(current_data)
25
+ """
26
+
27
+ from __future__ import annotations
28
+
29
+ from abc import ABC, abstractmethod
30
+ from dataclasses import dataclass, field
31
+ from enum import Enum, auto
32
+ from typing import Any, Iterator, Protocol, TYPE_CHECKING
33
+ import warnings
34
+
35
+ import numpy as np
36
+
37
+ if TYPE_CHECKING:
38
+ import polars as pl
39
+
40
+
41
+ class OnlineLearnerType(Enum):
42
+ """Type of online learning algorithm."""
43
+
44
+ SGD_SVM = auto() # SGD-based One-Class SVM
45
+ INCREMENTAL_PCA = auto() # Streaming PCA
46
+ MINI_BATCH_KMEANS = auto() # Mini-batch K-Means
47
+ ONLINE_COVARIANCE = auto() # Streaming covariance estimation
48
+
49
+
50
+ @dataclass
51
+ class OnlineLearnerConfig:
52
+ """Configuration for online learning algorithms.
53
+
54
+ Attributes:
55
+ learning_rate: Initial learning rate
56
+ learning_rate_schedule: Schedule type ('constant', 'optimal', 'invscaling')
57
+ n_iterations: Number of passes through data
58
+ batch_size: Mini-batch size for partial_fit
59
+ regularization: L2 regularization strength
60
+ random_state: Random seed
61
+ warm_start: Whether to continue from previous fit
62
+ tol: Tolerance for convergence
63
+ """
64
+
65
+ learning_rate: float = 0.001
66
+ learning_rate_schedule: str = "optimal"
67
+ n_iterations: int = 5
68
+ batch_size: int = 1000
69
+ regularization: float = 0.0001
70
+ random_state: int = 42
71
+ warm_start: bool = True
72
+ tol: float = 1e-4
73
+
74
+ # SVM-specific
75
+ nu: float = 0.1 # Upper bound on outlier fraction
76
+ kernel_approx: str = "nystroem" # Kernel approximation method
77
+ n_components: int = 100 # Number of kernel components
78
+
79
+
80
+ class IncrementalModel(Protocol):
81
+ """Protocol for incremental learning models."""
82
+
83
+ def partial_fit(self, X: np.ndarray) -> "IncrementalModel":
84
+ """Fit on a batch of data."""
85
+ ...
86
+
87
+ def predict(self, X: np.ndarray) -> np.ndarray:
88
+ """Predict on data."""
89
+ ...
90
+
91
+
92
+ class OnlineStatistics:
93
+ """Welford's online algorithm for computing running statistics.
94
+
95
+ Computes mean, variance, and standard deviation in a single pass
96
+ with O(1) memory per feature.
97
+
98
+ Example:
99
+ stats = OnlineStatistics(n_features=10)
100
+ for batch in data_stream:
101
+ stats.update(batch)
102
+ mean, std = stats.mean, stats.std
103
+ """
104
+
105
+ def __init__(self, n_features: int):
106
+ """Initialize online statistics tracker.
107
+
108
+ Args:
109
+ n_features: Number of features
110
+ """
111
+ self.n_features = n_features
112
+ self.n_samples = 0
113
+ self._mean = np.zeros(n_features)
114
+ self._M2 = np.zeros(n_features) # Sum of squared differences
115
+ self._min = np.full(n_features, np.inf)
116
+ self._max = np.full(n_features, -np.inf)
117
+
118
+ def update(self, X: np.ndarray) -> None:
119
+ """Update statistics with new batch.
120
+
121
+ Args:
122
+ X: Data batch (n_samples, n_features)
123
+ """
124
+ if X.ndim == 1:
125
+ X = X.reshape(1, -1)
126
+
127
+ for x in X:
128
+ self.n_samples += 1
129
+ delta = x - self._mean
130
+ self._mean += delta / self.n_samples
131
+ delta2 = x - self._mean
132
+ self._M2 += delta * delta2
133
+ self._min = np.minimum(self._min, x)
134
+ self._max = np.maximum(self._max, x)
135
+
136
+ def update_batch(self, X: np.ndarray) -> None:
137
+ """Batch update using parallel algorithm.
138
+
139
+ More efficient than individual updates for large batches.
140
+ """
141
+ if X.ndim == 1:
142
+ X = X.reshape(1, -1)
143
+
144
+ n_batch = len(X)
145
+ batch_mean = X.mean(axis=0)
146
+ batch_var = X.var(axis=0, ddof=0)
147
+
148
+ # Combine with existing statistics
149
+ if self.n_samples == 0:
150
+ self._mean = batch_mean
151
+ self._M2 = batch_var * n_batch
152
+ else:
153
+ n_total = self.n_samples + n_batch
154
+ delta = batch_mean - self._mean
155
+
156
+ self._mean = (self.n_samples * self._mean + n_batch * batch_mean) / n_total
157
+ self._M2 += batch_var * n_batch + delta**2 * self.n_samples * n_batch / n_total
158
+
159
+ self.n_samples += n_batch
160
+ self._min = np.minimum(self._min, X.min(axis=0))
161
+ self._max = np.maximum(self._max, X.max(axis=0))
162
+
163
+ @property
164
+ def mean(self) -> np.ndarray:
165
+ """Get current mean."""
166
+ return self._mean.copy()
167
+
168
+ @property
169
+ def variance(self) -> np.ndarray:
170
+ """Get current variance."""
171
+ if self.n_samples < 2:
172
+ return np.zeros(self.n_features)
173
+ return self._M2 / (self.n_samples - 1)
174
+
175
+ @property
176
+ def std(self) -> np.ndarray:
177
+ """Get current standard deviation."""
178
+ return np.sqrt(self.variance)
179
+
180
+ @property
181
+ def min(self) -> np.ndarray:
182
+ """Get minimum values."""
183
+ return self._min.copy()
184
+
185
+ @property
186
+ def max(self) -> np.ndarray:
187
+ """Get maximum values."""
188
+ return self._max.copy()
189
+
190
+
191
+ class OnlineScaler:
192
+ """Online standardization scaler.
193
+
194
+ Computes scaling parameters incrementally and can transform data
195
+ using the running mean and standard deviation.
196
+
197
+ Example:
198
+ scaler = OnlineScaler()
199
+ for batch in training_data:
200
+ scaler.partial_fit(batch)
201
+ scaled = scaler.transform(new_data)
202
+ """
203
+
204
+ def __init__(self, with_mean: bool = True, with_std: bool = True):
205
+ """Initialize scaler.
206
+
207
+ Args:
208
+ with_mean: Whether to center data
209
+ with_std: Whether to scale by std
210
+ """
211
+ self.with_mean = with_mean
212
+ self.with_std = with_std
213
+ self._stats: OnlineStatistics | None = None
214
+
215
+ def partial_fit(self, X: np.ndarray) -> "OnlineScaler":
216
+ """Update scaler with new data.
217
+
218
+ Args:
219
+ X: Data batch (n_samples, n_features)
220
+
221
+ Returns:
222
+ self
223
+ """
224
+ if X.ndim == 1:
225
+ X = X.reshape(1, -1)
226
+
227
+ if self._stats is None:
228
+ self._stats = OnlineStatistics(X.shape[1])
229
+
230
+ self._stats.update_batch(X)
231
+ return self
232
+
233
+ def transform(self, X: np.ndarray) -> np.ndarray:
234
+ """Transform data using learned parameters.
235
+
236
+ Args:
237
+ X: Data to transform
238
+
239
+ Returns:
240
+ Transformed data
241
+ """
242
+ if self._stats is None:
243
+ raise RuntimeError("Scaler not fitted. Call partial_fit first.")
244
+
245
+ result = X.copy()
246
+ if self.with_mean:
247
+ result = result - self._stats.mean
248
+ if self.with_std:
249
+ std = self._stats.std
250
+ std = np.where(std == 0, 1, std)
251
+ result = result / std
252
+ return result
253
+
254
+ def fit_transform(self, X: np.ndarray) -> np.ndarray:
255
+ """Fit and transform in one step."""
256
+ self.partial_fit(X)
257
+ return self.transform(X)
258
+
259
+ @property
260
+ def mean_(self) -> np.ndarray:
261
+ """Get learned mean."""
262
+ if self._stats is None:
263
+ raise RuntimeError("Scaler not fitted")
264
+ return self._stats.mean
265
+
266
+ @property
267
+ def scale_(self) -> np.ndarray:
268
+ """Get learned scale (std)."""
269
+ if self._stats is None:
270
+ raise RuntimeError("Scaler not fitted")
271
+ return self._stats.std
272
+
273
+
274
+ class SGDOneClassSVM:
275
+ """SGD-based One-Class SVM for online anomaly detection.
276
+
277
+ This implementation uses:
278
+ 1. Kernel approximation (Nystroem or RBF Sampler) for scalability
279
+ 2. SGD optimization for online learning
280
+ 3. Linear SVM in the approximated feature space
281
+
282
+ Memory: O(n_components × n_features) instead of O(n_samples²)
283
+
284
+ Example:
285
+ model = SGDOneClassSVM(nu=0.05, n_components=100)
286
+ for batch in data_stream:
287
+ model.partial_fit(batch)
288
+ predictions = model.predict(test_data) # -1 for outliers
289
+ """
290
+
291
+ def __init__(
292
+ self,
293
+ nu: float = 0.1,
294
+ kernel_approx: str = "nystroem",
295
+ n_components: int = 100,
296
+ gamma: float | str = "scale",
297
+ learning_rate: str = "optimal",
298
+ eta0: float = 0.01,
299
+ random_state: int = 42,
300
+ max_iter: int = 1000,
301
+ tol: float = 1e-4,
302
+ ):
303
+ """Initialize SGD One-Class SVM.
304
+
305
+ Args:
306
+ nu: Upper bound on fraction of outliers (0 < nu <= 0.5)
307
+ kernel_approx: Kernel approximation ('nystroem' or 'rbf_sampler')
308
+ n_components: Number of kernel components
309
+ gamma: Kernel coefficient ('scale', 'auto', or float)
310
+ learning_rate: Learning rate schedule
311
+ eta0: Initial learning rate
312
+ random_state: Random seed
313
+ max_iter: Maximum iterations for SGD
314
+ tol: Tolerance for convergence
315
+ """
316
+ self.nu = nu
317
+ self.kernel_approx = kernel_approx
318
+ self.n_components = n_components
319
+ self.gamma = gamma
320
+ self.learning_rate = learning_rate
321
+ self.eta0 = eta0
322
+ self.random_state = random_state
323
+ self.max_iter = max_iter
324
+ self.tol = tol
325
+
326
+ self._kernel_transformer = None
327
+ self._sgd_classifier = None
328
+ self._scaler = None
329
+ self._is_fitted = False
330
+ self._n_features = None
331
+
332
+ def _init_models(self, X: np.ndarray) -> None:
333
+ """Initialize internal models on first fit."""
334
+ from sklearn.kernel_approximation import Nystroem, RBFSampler
335
+ from sklearn.linear_model import SGDClassifier
336
+
337
+ self._n_features = X.shape[1]
338
+
339
+ # Compute gamma if needed
340
+ gamma = self.gamma
341
+ if gamma == "scale":
342
+ gamma = 1.0 / (self._n_features * X.var())
343
+ elif gamma == "auto":
344
+ gamma = 1.0 / self._n_features
345
+
346
+ # Initialize kernel approximation
347
+ if self.kernel_approx == "nystroem":
348
+ self._kernel_transformer = Nystroem(
349
+ kernel="rbf",
350
+ gamma=gamma,
351
+ n_components=min(self.n_components, len(X)),
352
+ random_state=self.random_state,
353
+ )
354
+ else:
355
+ self._kernel_transformer = RBFSampler(
356
+ gamma=gamma,
357
+ n_components=self.n_components,
358
+ random_state=self.random_state,
359
+ )
360
+
361
+ # Initialize SGD classifier
362
+ # Use hinge loss for SVM-like behavior
363
+ self._sgd_classifier = SGDClassifier(
364
+ loss="hinge",
365
+ penalty="l2",
366
+ alpha=0.0001,
367
+ learning_rate=self.learning_rate,
368
+ eta0=self.eta0,
369
+ random_state=self.random_state,
370
+ max_iter=self.max_iter,
371
+ tol=self.tol,
372
+ warm_start=True,
373
+ )
374
+
375
+ # Initialize online scaler
376
+ self._scaler = OnlineScaler()
377
+
378
+ def partial_fit(self, X: np.ndarray) -> "SGDOneClassSVM":
379
+ """Incrementally fit the model on a batch.
380
+
381
+ For One-Class SVM, we generate synthetic outliers and train
382
+ a binary classifier to separate normal data from outliers.
383
+
384
+ Args:
385
+ X: Training data (n_samples, n_features)
386
+
387
+ Returns:
388
+ self
389
+ """
390
+ if X.ndim == 1:
391
+ X = X.reshape(1, -1)
392
+
393
+ # Initialize on first call
394
+ if not self._is_fitted:
395
+ self._init_models(X)
396
+ # Fit kernel transformer on first batch
397
+ self._kernel_transformer.fit(X)
398
+
399
+ # Update scaler
400
+ self._scaler.partial_fit(X)
401
+ X_scaled = self._scaler.transform(X)
402
+
403
+ # Transform to kernel space
404
+ X_kernel = self._kernel_transformer.transform(X_scaled)
405
+
406
+ # Generate synthetic outliers
407
+ n_outliers = max(1, int(len(X) * self.nu / (1 - self.nu)))
408
+ outliers = self._generate_outliers(X_scaled, n_outliers)
409
+ outliers_kernel = self._kernel_transformer.transform(outliers)
410
+
411
+ # Combine normal and outliers
412
+ X_combined = np.vstack([X_kernel, outliers_kernel])
413
+ y_combined = np.array([1] * len(X) + [-1] * n_outliers)
414
+
415
+ # Partial fit SGD classifier
416
+ self._sgd_classifier.partial_fit(X_combined, y_combined, classes=[-1, 1])
417
+ self._is_fitted = True
418
+
419
+ return self
420
+
421
+ def _generate_outliers(self, X: np.ndarray, n_outliers: int) -> np.ndarray:
422
+ """Generate synthetic outliers for training.
423
+
424
+ Uses uniform sampling in an expanded bounding box around the data.
425
+ """
426
+ rng = np.random.default_rng(self.random_state)
427
+
428
+ # Expand bounding box
429
+ min_vals = X.min(axis=0)
430
+ max_vals = X.max(axis=0)
431
+ ranges = max_vals - min_vals
432
+ ranges = np.where(ranges == 0, 1, ranges)
433
+
434
+ # Sample from expanded box
435
+ expansion = 1.5
436
+ outliers = rng.uniform(
437
+ min_vals - expansion * ranges,
438
+ max_vals + expansion * ranges,
439
+ size=(n_outliers, X.shape[1]),
440
+ )
441
+
442
+ return outliers
443
+
444
+ def predict(self, X: np.ndarray) -> np.ndarray:
445
+ """Predict if samples are outliers.
446
+
447
+ Args:
448
+ X: Test data (n_samples, n_features)
449
+
450
+ Returns:
451
+ Array of predictions: 1 for normal, -1 for outlier
452
+ """
453
+ if not self._is_fitted:
454
+ raise RuntimeError("Model not fitted. Call partial_fit first.")
455
+
456
+ if X.ndim == 1:
457
+ X = X.reshape(1, -1)
458
+
459
+ X_scaled = self._scaler.transform(X)
460
+ X_kernel = self._kernel_transformer.transform(X_scaled)
461
+
462
+ return self._sgd_classifier.predict(X_kernel)
463
+
464
+ def decision_function(self, X: np.ndarray) -> np.ndarray:
465
+ """Compute decision function values.
466
+
467
+ Args:
468
+ X: Test data
469
+
470
+ Returns:
471
+ Decision function values (positive = normal, negative = outlier)
472
+ """
473
+ if not self._is_fitted:
474
+ raise RuntimeError("Model not fitted. Call partial_fit first.")
475
+
476
+ if X.ndim == 1:
477
+ X = X.reshape(1, -1)
478
+
479
+ X_scaled = self._scaler.transform(X)
480
+ X_kernel = self._kernel_transformer.transform(X_scaled)
481
+
482
+ return self._sgd_classifier.decision_function(X_kernel)
483
+
484
+ def fit(self, X: np.ndarray) -> "SGDOneClassSVM":
485
+ """Fit the model on entire dataset at once.
486
+
487
+ For compatibility with sklearn API.
488
+ """
489
+ return self.partial_fit(X)
490
+
491
+
492
+ class IncrementalMahalanobis:
493
+ """Incremental Mahalanobis distance computation.
494
+
495
+ Maintains running mean and covariance matrix for computing
496
+ Mahalanobis distances without storing all data.
497
+
498
+ Memory: O(d²) for d features instead of O(n × d) for n samples.
499
+
500
+ Example:
501
+ detector = IncrementalMahalanobis()
502
+ for batch in training_data:
503
+ detector.partial_fit(batch)
504
+ distances = detector.mahalanobis(test_data)
505
+ """
506
+
507
+ def __init__(self, regularization: float = 1e-6):
508
+ """Initialize detector.
509
+
510
+ Args:
511
+ regularization: Regularization for covariance inversion
512
+ """
513
+ self.regularization = regularization
514
+ self._n_samples = 0
515
+ self._mean = None
516
+ self._cov_sum = None
517
+ self._inv_cov = None
518
+
519
+ def partial_fit(self, X: np.ndarray) -> "IncrementalMahalanobis":
520
+ """Update with new batch.
521
+
522
+ Args:
523
+ X: Data batch (n_samples, n_features)
524
+
525
+ Returns:
526
+ self
527
+ """
528
+ if X.ndim == 1:
529
+ X = X.reshape(1, -1)
530
+
531
+ n_batch = len(X)
532
+ batch_mean = X.mean(axis=0)
533
+
534
+ if self._n_samples == 0:
535
+ self._mean = batch_mean
536
+ self._cov_sum = np.zeros((X.shape[1], X.shape[1]))
537
+ else:
538
+ # Update mean
539
+ total = self._n_samples + n_batch
540
+ self._mean = (self._n_samples * self._mean + n_batch * batch_mean) / total
541
+
542
+ # Update covariance sum
543
+ centered = X - self._mean
544
+ self._cov_sum += centered.T @ centered
545
+ self._n_samples += n_batch
546
+
547
+ # Invalidate cached inverse
548
+ self._inv_cov = None
549
+
550
+ return self
551
+
552
+ @property
553
+ def covariance(self) -> np.ndarray:
554
+ """Get current covariance matrix."""
555
+ if self._n_samples < 2:
556
+ raise RuntimeError("Need at least 2 samples for covariance")
557
+ return self._cov_sum / (self._n_samples - 1)
558
+
559
+ def _compute_inverse_covariance(self) -> np.ndarray:
560
+ """Compute and cache inverse covariance."""
561
+ if self._inv_cov is None:
562
+ cov = self.covariance
563
+ # Add regularization
564
+ cov_reg = cov + self.regularization * np.eye(cov.shape[0])
565
+ self._inv_cov = np.linalg.inv(cov_reg)
566
+ return self._inv_cov
567
+
568
+ def mahalanobis(self, X: np.ndarray) -> np.ndarray:
569
+ """Compute Mahalanobis distances.
570
+
571
+ Args:
572
+ X: Test data (n_samples, n_features)
573
+
574
+ Returns:
575
+ Array of Mahalanobis distances
576
+ """
577
+ if self._n_samples < 2:
578
+ raise RuntimeError("Model not fitted with enough samples")
579
+
580
+ if X.ndim == 1:
581
+ X = X.reshape(1, -1)
582
+
583
+ inv_cov = self._compute_inverse_covariance()
584
+ centered = X - self._mean
585
+
586
+ # Mahalanobis distance: sqrt((x-μ)ᵀ Σ⁻¹ (x-μ))
587
+ left = centered @ inv_cov
588
+ distances = np.sqrt(np.sum(left * centered, axis=1))
589
+
590
+ return distances
591
+
592
+ def predict(self, X: np.ndarray, threshold: float = 3.0) -> np.ndarray:
593
+ """Predict outliers based on Mahalanobis distance.
594
+
595
+ Args:
596
+ X: Test data
597
+ threshold: Distance threshold for outlier detection
598
+
599
+ Returns:
600
+ Array of predictions: 1 for normal, -1 for outlier
601
+ """
602
+ distances = self.mahalanobis(X)
603
+ return np.where(distances > threshold, -1, 1)
604
+
605
+
606
+ class SGDOnlineMixin:
607
+ """Mixin providing SGD-based online learning capabilities.
608
+
609
+ This mixin enables memory-efficient training of models that
610
+ traditionally require full data loading (SVM, covariance-based methods).
611
+
612
+ Example:
613
+ class MemoryEfficientSVMValidator(AnomalyValidator, SGDOnlineMixin):
614
+ def validate(self, lf):
615
+ # Create online SVM
616
+ model = self.create_online_svm(nu=0.05)
617
+
618
+ # Stream training data
619
+ for chunk in self.iterate_training_chunks(lf):
620
+ model.partial_fit(chunk)
621
+
622
+ # Predict
623
+ predictions = model.predict(current_data)
624
+ """
625
+
626
+ _online_config: OnlineLearnerConfig = None
627
+ _online_models: dict[str, IncrementalModel] = None
628
+
629
+ def get_online_config(self) -> OnlineLearnerConfig:
630
+ """Get online learning configuration."""
631
+ if self._online_config is None:
632
+ self._online_config = OnlineLearnerConfig()
633
+ return self._online_config
634
+
635
+ def set_online_config(self, config: OnlineLearnerConfig) -> None:
636
+ """Set online learning configuration."""
637
+ self._online_config = config
638
+
639
+ def create_online_svm(
640
+ self,
641
+ nu: float | None = None,
642
+ n_components: int | None = None,
643
+ **kwargs: Any,
644
+ ) -> SGDOneClassSVM:
645
+ """Create SGD-based One-Class SVM.
646
+
647
+ Args:
648
+ nu: Upper bound on outlier fraction
649
+ n_components: Number of kernel components
650
+ **kwargs: Additional parameters
651
+
652
+ Returns:
653
+ SGDOneClassSVM instance
654
+ """
655
+ config = self.get_online_config()
656
+
657
+ return SGDOneClassSVM(
658
+ nu=nu or config.nu,
659
+ n_components=n_components or config.n_components,
660
+ kernel_approx=config.kernel_approx,
661
+ learning_rate=config.learning_rate_schedule,
662
+ eta0=config.learning_rate,
663
+ random_state=config.random_state,
664
+ tol=config.tol,
665
+ **kwargs,
666
+ )
667
+
668
+ def create_online_scaler(self) -> OnlineScaler:
669
+ """Create online standardization scaler."""
670
+ return OnlineScaler()
671
+
672
+ def create_online_statistics(self, n_features: int) -> OnlineStatistics:
673
+ """Create online statistics tracker."""
674
+ return OnlineStatistics(n_features)
675
+
676
+ def create_mahalanobis_detector(
677
+ self,
678
+ regularization: float = 1e-6,
679
+ ) -> IncrementalMahalanobis:
680
+ """Create incremental Mahalanobis distance detector."""
681
+ return IncrementalMahalanobis(regularization=regularization)
682
+
683
+ def train_incrementally(
684
+ self,
685
+ lf: "pl.LazyFrame",
686
+ columns: list[str],
687
+ model: IncrementalModel,
688
+ n_iterations: int | None = None,
689
+ ) -> IncrementalModel:
690
+ """Train model incrementally on streaming data.
691
+
692
+ Args:
693
+ lf: Input LazyFrame
694
+ columns: Columns to use
695
+ model: Incremental model with partial_fit method
696
+ n_iterations: Number of passes through data
697
+
698
+ Returns:
699
+ Trained model
700
+ """
701
+ from truthound.validators.memory.base import DataChunker
702
+
703
+ config = self.get_online_config()
704
+ n_iterations = n_iterations or config.n_iterations
705
+
706
+ chunker = DataChunker(
707
+ chunk_size=config.batch_size,
708
+ columns=columns,
709
+ drop_nulls=True,
710
+ )
711
+
712
+ for iteration in range(n_iterations):
713
+ for chunk_arr in chunker.iterate(lf, as_numpy=True):
714
+ model.partial_fit(chunk_arr)
715
+
716
+ if hasattr(self, "logger"):
717
+ self.logger.debug(f"Completed iteration {iteration + 1}/{n_iterations}")
718
+
719
+ return model