truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,937 @@
1
+ """ML-based ReDoS Pattern Analysis.
2
+
3
+ This module provides machine learning-based analysis for predicting
4
+ ReDoS vulnerability risk in regex patterns. It uses feature extraction
5
+ and trained models to assess pattern safety.
6
+
7
+ Architecture:
8
+ ┌─────────────────────────────────────────────────────────────────┐
9
+ │ ML Pattern Analyzer │
10
+ └─────────────────────────────────────────────────────────────────┘
11
+
12
+ ┌───────────────┬───────────────┼───────────────┬─────────────────┐
13
+ │ │ │ │ │
14
+ ▼ ▼ ▼ ▼ ▼
15
+ ┌─────────┐ ┌─────────┐ ┌──────────┐ ┌──────────┐ ┌─────────┐
16
+ │ Feature │ │ Model │ │Prediction│ │ Training │ │ Model │
17
+ │Extractor│ │ Manager │ │ Pipeline │ │ Pipeline │ │ Storage │
18
+ └─────────┘ └─────────┘ └──────────┘ └──────────┘ └─────────┘
19
+
20
+ Features extracted:
21
+ - Structural features (length, depth, groups, etc.)
22
+ - Quantifier features (count, types, positions)
23
+ - Alternation features (count, complexity)
24
+ - Character class features (ranges, negation)
25
+ - Backtracking potential features
26
+
27
+ Usage:
28
+ from truthound.validators.security.redos.ml_analyzer import (
29
+ MLPatternAnalyzer,
30
+ predict_redos_risk,
31
+ )
32
+
33
+ # Quick prediction
34
+ result = predict_redos_risk(r"(a+)+b")
35
+ print(result.risk_probability) # 0.95
36
+ print(result.risk_level) # ReDoSRisk.CRITICAL
37
+
38
+ # Full analyzer with custom model
39
+ analyzer = MLPatternAnalyzer()
40
+ analyzer.train(training_patterns, labels)
41
+ result = analyzer.predict(pattern)
42
+ """
43
+
44
+ from __future__ import annotations
45
+
46
+ import json
47
+ import math
48
+ import re
49
+ from abc import ABC, abstractmethod
50
+ from dataclasses import dataclass, field
51
+ from pathlib import Path
52
+ from typing import Any, Protocol, Sequence
53
+
54
+ from truthound.validators.security.redos.core import ReDoSRisk
55
+
56
+
57
+ @dataclass
58
+ class PatternFeatures:
59
+ """Extracted features from a regex pattern.
60
+
61
+ These features are used for ML-based risk prediction.
62
+ """
63
+
64
+ # Structural features
65
+ length: int = 0
66
+ group_count: int = 0
67
+ capture_group_count: int = 0
68
+ non_capture_group_count: int = 0
69
+ max_nesting_depth: int = 0
70
+ alternation_count: int = 0
71
+
72
+ # Quantifier features
73
+ plus_count: int = 0
74
+ star_count: int = 0
75
+ question_count: int = 0
76
+ bounded_quantifier_count: int = 0
77
+ unbounded_quantifier_count: int = 0
78
+ lazy_quantifier_count: int = 0
79
+ possessive_quantifier_count: int = 0
80
+ quantifier_density: float = 0.0
81
+
82
+ # Dangerous pattern indicators
83
+ nested_quantifier_count: int = 0
84
+ adjacent_quantifier_count: int = 0
85
+ quantified_alternation_count: int = 0
86
+ quantified_backreference_count: int = 0
87
+
88
+ # Character class features
89
+ char_class_count: int = 0
90
+ negated_char_class_count: int = 0
91
+ dot_count: int = 0
92
+ word_boundary_count: int = 0
93
+
94
+ # Lookaround features
95
+ lookahead_count: int = 0
96
+ lookbehind_count: int = 0
97
+ negative_lookaround_count: int = 0
98
+
99
+ # Backreference features
100
+ backreference_count: int = 0
101
+ max_backreference_index: int = 0
102
+
103
+ # Anchor features
104
+ start_anchor: bool = False
105
+ end_anchor: bool = False
106
+ anchored: bool = False
107
+
108
+ # Complexity metrics
109
+ backtracking_potential: float = 0.0
110
+ estimated_states: int = 0
111
+
112
+ def to_vector(self) -> list[float]:
113
+ """Convert features to a numeric vector for ML models."""
114
+ return [
115
+ float(self.length),
116
+ float(self.group_count),
117
+ float(self.capture_group_count),
118
+ float(self.non_capture_group_count),
119
+ float(self.max_nesting_depth),
120
+ float(self.alternation_count),
121
+ float(self.plus_count),
122
+ float(self.star_count),
123
+ float(self.question_count),
124
+ float(self.bounded_quantifier_count),
125
+ float(self.unbounded_quantifier_count),
126
+ float(self.lazy_quantifier_count),
127
+ float(self.possessive_quantifier_count),
128
+ float(self.quantifier_density),
129
+ float(self.nested_quantifier_count),
130
+ float(self.adjacent_quantifier_count),
131
+ float(self.quantified_alternation_count),
132
+ float(self.quantified_backreference_count),
133
+ float(self.char_class_count),
134
+ float(self.negated_char_class_count),
135
+ float(self.dot_count),
136
+ float(self.word_boundary_count),
137
+ float(self.lookahead_count),
138
+ float(self.lookbehind_count),
139
+ float(self.negative_lookaround_count),
140
+ float(self.backreference_count),
141
+ float(self.max_backreference_index),
142
+ float(self.start_anchor),
143
+ float(self.end_anchor),
144
+ float(self.anchored),
145
+ float(self.backtracking_potential),
146
+ float(self.estimated_states),
147
+ ]
148
+
149
+ @classmethod
150
+ def feature_names(cls) -> list[str]:
151
+ """Get names of all features in vector order."""
152
+ return [
153
+ "length",
154
+ "group_count",
155
+ "capture_group_count",
156
+ "non_capture_group_count",
157
+ "max_nesting_depth",
158
+ "alternation_count",
159
+ "plus_count",
160
+ "star_count",
161
+ "question_count",
162
+ "bounded_quantifier_count",
163
+ "unbounded_quantifier_count",
164
+ "lazy_quantifier_count",
165
+ "possessive_quantifier_count",
166
+ "quantifier_density",
167
+ "nested_quantifier_count",
168
+ "adjacent_quantifier_count",
169
+ "quantified_alternation_count",
170
+ "quantified_backreference_count",
171
+ "char_class_count",
172
+ "negated_char_class_count",
173
+ "dot_count",
174
+ "word_boundary_count",
175
+ "lookahead_count",
176
+ "lookbehind_count",
177
+ "negative_lookaround_count",
178
+ "backreference_count",
179
+ "max_backreference_index",
180
+ "start_anchor",
181
+ "end_anchor",
182
+ "anchored",
183
+ "backtracking_potential",
184
+ "estimated_states",
185
+ ]
186
+
187
+ def to_dict(self) -> dict[str, Any]:
188
+ """Convert to dictionary."""
189
+ return {name: value for name, value in zip(self.feature_names(), self.to_vector())}
190
+
191
+
192
+ class FeatureExtractor:
193
+ """Extracts ML-relevant features from regex patterns.
194
+
195
+ This extractor analyzes regex patterns and produces a feature vector
196
+ suitable for machine learning models.
197
+
198
+ Example:
199
+ extractor = FeatureExtractor()
200
+ features = extractor.extract(r"(a+)+b")
201
+ print(features.nested_quantifier_count) # 1
202
+ print(features.backtracking_potential) # high value
203
+ """
204
+
205
+ # Compiled patterns for feature extraction
206
+ _PLUS_PATTERN = re.compile(r"(?<!\\)\+")
207
+ _STAR_PATTERN = re.compile(r"(?<!\\)\*")
208
+ _QUESTION_PATTERN = re.compile(r"(?<!\\)\?(?![=!<:])")
209
+ _BOUNDED_QUANT_PATTERN = re.compile(r"\{(\d+)(?:,(\d*))?\}")
210
+ _LAZY_QUANT_PATTERN = re.compile(r"[+*?]\?|\{[^}]+\}\?")
211
+ _CHAR_CLASS_PATTERN = re.compile(r"\[[^\]]+\]")
212
+ _NEGATED_CLASS_PATTERN = re.compile(r"\[\^[^\]]+\]")
213
+ _LOOKAHEAD_PATTERN = re.compile(r"\(\?[=!]")
214
+ _LOOKBEHIND_PATTERN = re.compile(r"\(\?<[=!]")
215
+ _BACKREFERENCE_PATTERN = re.compile(r"\\([1-9]\d*)")
216
+ _NESTED_QUANT_PATTERN = re.compile(r"\([^)]*[+*][^)]*\)[+*]")
217
+ _ADJACENT_QUANT_PATTERN = re.compile(r"[+*][+*]")
218
+ _QUANTIFIED_ALT_PATTERN = re.compile(r"\([^)]*\|[^)]*\)[+*?]")
219
+ _QUANTIFIED_BACKREF_PATTERN = re.compile(r"\\[1-9][+*]|\{[^}]+\}")
220
+ _DOT_PATTERN = re.compile(r"(?<!\\)\.")
221
+ _WORD_BOUNDARY_PATTERN = re.compile(r"\\b")
222
+ _NON_CAPTURE_GROUP_PATTERN = re.compile(r"\(\?(?:[imsxLu]|:)")
223
+ _CAPTURE_GROUP_PATTERN = re.compile(r"\((?!\?)")
224
+
225
+ def extract(self, pattern: str) -> PatternFeatures:
226
+ """Extract features from a regex pattern.
227
+
228
+ Args:
229
+ pattern: Regex pattern to analyze
230
+
231
+ Returns:
232
+ PatternFeatures with all extracted features
233
+ """
234
+ features = PatternFeatures()
235
+
236
+ if not pattern:
237
+ return features
238
+
239
+ # Structural features
240
+ features.length = len(pattern)
241
+ features.max_nesting_depth = self._calculate_nesting_depth(pattern)
242
+ features.alternation_count = pattern.count("|")
243
+
244
+ # Group counts
245
+ features.capture_group_count = len(self._CAPTURE_GROUP_PATTERN.findall(pattern))
246
+ features.non_capture_group_count = len(self._NON_CAPTURE_GROUP_PATTERN.findall(pattern))
247
+ features.group_count = features.capture_group_count + features.non_capture_group_count
248
+
249
+ # Quantifier features
250
+ features.plus_count = len(self._PLUS_PATTERN.findall(pattern))
251
+ features.star_count = len(self._STAR_PATTERN.findall(pattern))
252
+ features.question_count = len(self._QUESTION_PATTERN.findall(pattern))
253
+ features.lazy_quantifier_count = len(self._LAZY_QUANT_PATTERN.findall(pattern))
254
+
255
+ bounded_matches = self._BOUNDED_QUANT_PATTERN.findall(pattern)
256
+ features.bounded_quantifier_count = len(bounded_matches)
257
+
258
+ # Unbounded quantifiers
259
+ unbounded_count = 0
260
+ for min_val, max_val in bounded_matches:
261
+ if max_val == "": # {n,} form
262
+ unbounded_count += 1
263
+ features.unbounded_quantifier_count = (
264
+ features.plus_count + features.star_count + unbounded_count
265
+ )
266
+
267
+ # Quantifier density
268
+ total_quantifiers = (
269
+ features.plus_count
270
+ + features.star_count
271
+ + features.question_count
272
+ + features.bounded_quantifier_count
273
+ )
274
+ features.quantifier_density = total_quantifiers / max(features.length, 1)
275
+
276
+ # Dangerous patterns
277
+ features.nested_quantifier_count = len(self._NESTED_QUANT_PATTERN.findall(pattern))
278
+ features.adjacent_quantifier_count = len(self._ADJACENT_QUANT_PATTERN.findall(pattern))
279
+ features.quantified_alternation_count = len(self._QUANTIFIED_ALT_PATTERN.findall(pattern))
280
+ features.quantified_backreference_count = len(
281
+ self._QUANTIFIED_BACKREF_PATTERN.findall(pattern)
282
+ )
283
+
284
+ # Character class features
285
+ features.char_class_count = len(self._CHAR_CLASS_PATTERN.findall(pattern))
286
+ features.negated_char_class_count = len(self._NEGATED_CLASS_PATTERN.findall(pattern))
287
+ features.dot_count = len(self._DOT_PATTERN.findall(pattern))
288
+ features.word_boundary_count = len(self._WORD_BOUNDARY_PATTERN.findall(pattern))
289
+
290
+ # Lookaround features
291
+ lookahead_matches = self._LOOKAHEAD_PATTERN.findall(pattern)
292
+ lookbehind_matches = self._LOOKBEHIND_PATTERN.findall(pattern)
293
+ features.lookahead_count = len(lookahead_matches)
294
+ features.lookbehind_count = len(lookbehind_matches)
295
+ features.negative_lookaround_count = (
296
+ pattern.count("(?!") + pattern.count("(?<!")
297
+ )
298
+
299
+ # Backreference features
300
+ backref_matches = self._BACKREFERENCE_PATTERN.findall(pattern)
301
+ features.backreference_count = len(backref_matches)
302
+ if backref_matches:
303
+ features.max_backreference_index = max(int(m) for m in backref_matches)
304
+
305
+ # Anchor features
306
+ features.start_anchor = pattern.startswith("^") or "\\A" in pattern
307
+ features.end_anchor = pattern.endswith("$") or "\\Z" in pattern or "\\z" in pattern
308
+ features.anchored = features.start_anchor and features.end_anchor
309
+
310
+ # Complexity metrics
311
+ features.backtracking_potential = self._calculate_backtracking_potential(features)
312
+ features.estimated_states = self._estimate_nfa_states(features)
313
+
314
+ return features
315
+
316
+ def _calculate_nesting_depth(self, pattern: str) -> int:
317
+ """Calculate maximum nesting depth of groups."""
318
+ depth = 0
319
+ max_depth = 0
320
+ for char in pattern:
321
+ if char == "(":
322
+ depth += 1
323
+ max_depth = max(max_depth, depth)
324
+ elif char == ")":
325
+ depth = max(0, depth - 1)
326
+ return max_depth
327
+
328
+ def _calculate_backtracking_potential(self, features: PatternFeatures) -> float:
329
+ """Estimate backtracking potential based on features.
330
+
331
+ Higher values indicate higher risk of catastrophic backtracking.
332
+ """
333
+ potential = 0.0
334
+
335
+ # Nested quantifiers are the biggest risk
336
+ potential += features.nested_quantifier_count * 50.0
337
+
338
+ # Quantified alternation is also risky
339
+ potential += features.quantified_alternation_count * 30.0
340
+
341
+ # Adjacent quantifiers
342
+ potential += features.adjacent_quantifier_count * 20.0
343
+
344
+ # Unbounded quantifiers increase potential
345
+ potential += features.unbounded_quantifier_count * 5.0
346
+
347
+ # Deep nesting increases potential
348
+ potential += features.max_nesting_depth * 3.0
349
+
350
+ # Backreferences with quantifiers
351
+ potential += features.quantified_backreference_count * 40.0
352
+
353
+ # Lack of anchoring increases potential
354
+ if not features.anchored:
355
+ potential *= 1.2
356
+
357
+ return min(potential, 100.0)
358
+
359
+ def _estimate_nfa_states(self, features: PatternFeatures) -> int:
360
+ """Estimate number of NFA states.
361
+
362
+ This is a rough approximation based on pattern features.
363
+ """
364
+ # Base states from length
365
+ states = features.length
366
+
367
+ # Groups add states
368
+ states += features.group_count * 2
369
+
370
+ # Quantifiers add states
371
+ states += features.plus_count * 2
372
+ states += features.star_count * 2
373
+ states += features.question_count
374
+
375
+ # Bounded quantifiers can add many states
376
+ states += features.bounded_quantifier_count * 5
377
+
378
+ # Alternations add branch states
379
+ states += features.alternation_count * 2
380
+
381
+ return states
382
+
383
+
384
+ @dataclass
385
+ class MLPredictionResult:
386
+ """Result of ML-based ReDoS risk prediction.
387
+
388
+ Attributes:
389
+ pattern: The analyzed pattern
390
+ features: Extracted features
391
+ risk_probability: Probability of ReDoS vulnerability (0-1)
392
+ risk_level: Categorical risk level
393
+ confidence: Model confidence in the prediction
394
+ contributing_factors: Features that most influenced the prediction
395
+ model_version: Version of the model used
396
+ """
397
+
398
+ pattern: str
399
+ features: PatternFeatures
400
+ risk_probability: float
401
+ risk_level: ReDoSRisk
402
+ confidence: float
403
+ contributing_factors: list[tuple[str, float]] = field(default_factory=list)
404
+ model_version: str = "1.0.0"
405
+
406
+ def to_dict(self) -> dict[str, Any]:
407
+ """Convert to dictionary."""
408
+ return {
409
+ "pattern": self.pattern,
410
+ "features": self.features.to_dict(),
411
+ "risk_probability": round(self.risk_probability, 4),
412
+ "risk_level": self.risk_level.name,
413
+ "confidence": round(self.confidence, 4),
414
+ "contributing_factors": [
415
+ {"feature": name, "contribution": round(contrib, 4)}
416
+ for name, contrib in self.contributing_factors
417
+ ],
418
+ "model_version": self.model_version,
419
+ }
420
+
421
+
422
+ class MLModelProtocol(Protocol):
423
+ """Protocol for ML models used in ReDoS prediction."""
424
+
425
+ def predict(self, features: list[float]) -> tuple[float, float]:
426
+ """Predict risk probability and confidence.
427
+
428
+ Args:
429
+ features: Feature vector
430
+
431
+ Returns:
432
+ Tuple of (risk_probability, confidence)
433
+ """
434
+ ...
435
+
436
+ def get_feature_importance(self) -> list[float]:
437
+ """Get feature importance scores."""
438
+ ...
439
+
440
+
441
+ class RuleBasedModel:
442
+ """Rule-based model for ReDoS risk prediction.
443
+
444
+ This model uses hand-crafted rules based on known ReDoS patterns
445
+ to estimate risk. It serves as a baseline and fallback when
446
+ ML models are not available.
447
+
448
+ The model assigns weights to various pattern features and combines
449
+ them using a logistic function to produce a probability.
450
+ """
451
+
452
+ # Feature weights (learned from known vulnerable patterns)
453
+ FEATURE_WEIGHTS: dict[str, float] = {
454
+ "nested_quantifier_count": 5.0,
455
+ "quantified_backreference_count": 4.0,
456
+ "quantified_alternation_count": 3.5,
457
+ "adjacent_quantifier_count": 2.5,
458
+ "unbounded_quantifier_count": 1.5,
459
+ "max_nesting_depth": 0.8,
460
+ "star_count": 0.5,
461
+ "plus_count": 0.5,
462
+ "alternation_count": 0.3,
463
+ "quantifier_density": 2.0,
464
+ "backtracking_potential": 0.1, # Already composite
465
+ }
466
+
467
+ # Bias term
468
+ BIAS = -2.0
469
+
470
+ def __init__(self):
471
+ """Initialize the rule-based model."""
472
+ self._feature_names = PatternFeatures.feature_names()
473
+
474
+ def predict(self, features: list[float]) -> tuple[float, float]:
475
+ """Predict risk probability using rules.
476
+
477
+ Args:
478
+ features: Feature vector
479
+
480
+ Returns:
481
+ Tuple of (risk_probability, confidence)
482
+ """
483
+ # Map features to dictionary for easier access
484
+ feature_dict = dict(zip(self._feature_names, features))
485
+
486
+ # Calculate weighted sum
487
+ weighted_sum = self.BIAS
488
+ for feature_name, weight in self.FEATURE_WEIGHTS.items():
489
+ if feature_name in feature_dict:
490
+ weighted_sum += feature_dict[feature_name] * weight
491
+
492
+ # Apply logistic function
493
+ probability = 1.0 / (1.0 + math.exp(-weighted_sum))
494
+
495
+ # Confidence based on how extreme the score is
496
+ confidence = abs(2 * probability - 1)
497
+
498
+ return probability, confidence
499
+
500
+ def get_feature_importance(self) -> list[float]:
501
+ """Get feature importance scores."""
502
+ importance = []
503
+ for name in self._feature_names:
504
+ importance.append(self.FEATURE_WEIGHTS.get(name, 0.0))
505
+ return importance
506
+
507
+
508
+ class EnsembleModel:
509
+ """Ensemble model combining multiple prediction strategies.
510
+
511
+ This model combines rule-based heuristics with pattern matching
512
+ for more robust predictions.
513
+ """
514
+
515
+ def __init__(self):
516
+ """Initialize ensemble model."""
517
+ self._rule_model = RuleBasedModel()
518
+ self._feature_names = PatternFeatures.feature_names()
519
+
520
+ # Known dangerous pattern signatures with risk scores
521
+ self._dangerous_signatures: list[tuple[re.Pattern, float]] = [
522
+ (re.compile(r"\([^)]*[+*][^)]*\)[+*]"), 0.95), # Nested quantifiers
523
+ (re.compile(r"\\[1-9][+*]"), 0.85), # Quantified backreference
524
+ (re.compile(r"\([^)]*\|[^)]*\)[+*]"), 0.75), # Quantified alternation
525
+ (re.compile(r"[+*][+*]"), 0.65), # Adjacent quantifiers
526
+ ]
527
+
528
+ def predict(self, features: list[float], pattern: str = "") -> tuple[float, float]:
529
+ """Predict using ensemble of methods.
530
+
531
+ Args:
532
+ features: Feature vector
533
+ pattern: Original pattern (optional, for signature matching)
534
+
535
+ Returns:
536
+ Tuple of (risk_probability, confidence)
537
+ """
538
+ # Rule-based prediction
539
+ rule_prob, rule_conf = self._rule_model.predict(features)
540
+
541
+ # Pattern signature matching
542
+ sig_prob = 0.0
543
+ for sig_pattern, risk in self._dangerous_signatures:
544
+ if pattern and sig_pattern.search(pattern):
545
+ sig_prob = max(sig_prob, risk)
546
+
547
+ # Combine predictions (weighted average)
548
+ if sig_prob > 0:
549
+ # Signature match has high confidence
550
+ final_prob = 0.6 * sig_prob + 0.4 * rule_prob
551
+ final_conf = max(rule_conf, 0.9) # High confidence when signature matches
552
+ else:
553
+ final_prob = rule_prob
554
+ final_conf = rule_conf
555
+
556
+ return final_prob, final_conf
557
+
558
+ def get_feature_importance(self) -> list[float]:
559
+ """Get feature importance from base model."""
560
+ return self._rule_model.get_feature_importance()
561
+
562
+
563
+ class MLPatternAnalyzer:
564
+ """Machine learning-based regex pattern analyzer.
565
+
566
+ This analyzer uses ML models to predict ReDoS vulnerability risk.
567
+ It supports multiple model backends and can be trained on custom data.
568
+
569
+ Example:
570
+ analyzer = MLPatternAnalyzer()
571
+
572
+ # Predict risk
573
+ result = analyzer.predict(r"(a+)+b")
574
+ print(result.risk_level) # ReDoSRisk.CRITICAL
575
+
576
+ # Get detailed features
577
+ features = analyzer.extract_features(r"^[a-z]+$")
578
+ print(features.quantifier_density)
579
+
580
+ # Train custom model
581
+ patterns = ["(a+)+", "^[a-z]+$", ...]
582
+ labels = [1, 0, ...] # 1 = vulnerable, 0 = safe
583
+ analyzer.train(patterns, labels)
584
+ """
585
+
586
+ VERSION = "1.0.0"
587
+
588
+ # Risk thresholds
589
+ RISK_THRESHOLDS: dict[ReDoSRisk, float] = {
590
+ ReDoSRisk.NONE: 0.1,
591
+ ReDoSRisk.LOW: 0.3,
592
+ ReDoSRisk.MEDIUM: 0.5,
593
+ ReDoSRisk.HIGH: 0.7,
594
+ ReDoSRisk.CRITICAL: 0.85,
595
+ }
596
+
597
+ def __init__(
598
+ self,
599
+ model: MLModelProtocol | None = None,
600
+ feature_extractor: FeatureExtractor | None = None,
601
+ ):
602
+ """Initialize the analyzer.
603
+
604
+ Args:
605
+ model: ML model to use (defaults to EnsembleModel)
606
+ feature_extractor: Feature extractor (defaults to FeatureExtractor)
607
+ """
608
+ self.extractor = feature_extractor or FeatureExtractor()
609
+ self._model: Any = model or EnsembleModel()
610
+ self._trained = False
611
+
612
+ def extract_features(self, pattern: str) -> PatternFeatures:
613
+ """Extract features from a pattern.
614
+
615
+ Args:
616
+ pattern: Regex pattern
617
+
618
+ Returns:
619
+ PatternFeatures object
620
+ """
621
+ return self.extractor.extract(pattern)
622
+
623
+ def predict(self, pattern: str) -> MLPredictionResult:
624
+ """Predict ReDoS risk for a pattern.
625
+
626
+ Uses the trained ML model if available, otherwise falls back
627
+ to the rule-based/ensemble model.
628
+
629
+ Args:
630
+ pattern: Regex pattern to analyze
631
+
632
+ Returns:
633
+ MLPredictionResult with prediction details
634
+ """
635
+ # Use trained ML predictor if available
636
+ if hasattr(self, "_ml_predictor") and self._ml_predictor is not None:
637
+ prediction = self._ml_predictor.predict(pattern)
638
+ # Convert to legacy MLPredictionResult format
639
+ return MLPredictionResult(
640
+ pattern=prediction.pattern,
641
+ features=self._convert_features(prediction.features),
642
+ risk_probability=prediction.risk_probability,
643
+ risk_level=prediction.risk_level,
644
+ confidence=prediction.confidence,
645
+ contributing_factors=prediction.contributing_factors,
646
+ model_version=prediction.model_version,
647
+ )
648
+
649
+ # Fallback to original implementation
650
+ # Extract features
651
+ features = self.extractor.extract(pattern)
652
+ feature_vector = features.to_vector()
653
+
654
+ # Get prediction
655
+ if isinstance(self._model, EnsembleModel):
656
+ probability, confidence = self._model.predict(feature_vector, pattern)
657
+ else:
658
+ probability, confidence = self._model.predict(feature_vector)
659
+
660
+ # Determine risk level
661
+ risk_level = self._probability_to_risk_level(probability)
662
+
663
+ # Get contributing factors
664
+ contributing_factors = self._get_contributing_factors(features)
665
+
666
+ return MLPredictionResult(
667
+ pattern=pattern,
668
+ features=features,
669
+ risk_probability=probability,
670
+ risk_level=risk_level,
671
+ confidence=confidence,
672
+ contributing_factors=contributing_factors,
673
+ model_version=self.VERSION,
674
+ )
675
+
676
+ def _convert_features(self, new_features: Any) -> PatternFeatures:
677
+ """Convert new PatternFeatures format to legacy format.
678
+
679
+ This is for backward compatibility with existing code that
680
+ expects the old PatternFeatures dataclass.
681
+ """
682
+ # The new and old PatternFeatures have the same structure
683
+ # Just return the new features directly
684
+ return PatternFeatures(
685
+ length=new_features.length,
686
+ group_count=new_features.group_count,
687
+ capture_group_count=new_features.capture_group_count,
688
+ non_capture_group_count=new_features.non_capture_group_count,
689
+ max_nesting_depth=new_features.max_nesting_depth,
690
+ alternation_count=new_features.alternation_count,
691
+ plus_count=new_features.plus_count,
692
+ star_count=new_features.star_count,
693
+ question_count=new_features.question_count,
694
+ bounded_quantifier_count=new_features.bounded_quantifier_count,
695
+ unbounded_quantifier_count=new_features.unbounded_quantifier_count,
696
+ lazy_quantifier_count=new_features.lazy_quantifier_count,
697
+ possessive_quantifier_count=new_features.possessive_quantifier_count,
698
+ quantifier_density=new_features.quantifier_density,
699
+ nested_quantifier_count=new_features.nested_quantifier_count,
700
+ adjacent_quantifier_count=new_features.adjacent_quantifier_count,
701
+ quantified_alternation_count=new_features.quantified_alternation_count,
702
+ quantified_backreference_count=new_features.quantified_backreference_count,
703
+ char_class_count=new_features.char_class_count,
704
+ negated_char_class_count=new_features.negated_char_class_count,
705
+ dot_count=new_features.dot_count,
706
+ word_boundary_count=new_features.word_boundary_count,
707
+ lookahead_count=new_features.lookahead_count,
708
+ lookbehind_count=new_features.lookbehind_count,
709
+ negative_lookaround_count=new_features.negative_lookaround_count,
710
+ backreference_count=new_features.backreference_count,
711
+ max_backreference_index=new_features.max_backreference_index,
712
+ start_anchor=new_features.start_anchor,
713
+ end_anchor=new_features.end_anchor,
714
+ anchored=new_features.anchored,
715
+ backtracking_potential=new_features.backtracking_potential,
716
+ estimated_states=new_features.estimated_states,
717
+ )
718
+
719
+ def predict_batch(self, patterns: Sequence[str]) -> list[MLPredictionResult]:
720
+ """Predict risk for multiple patterns.
721
+
722
+ Args:
723
+ patterns: Sequence of patterns to analyze
724
+
725
+ Returns:
726
+ List of MLPredictionResult objects
727
+ """
728
+ return [self.predict(pattern) for pattern in patterns]
729
+
730
+ def train(
731
+ self,
732
+ patterns: Sequence[str],
733
+ labels: Sequence[int],
734
+ validation_split: float = 0.2,
735
+ ) -> dict[str, float]:
736
+ """Train the model on labeled data.
737
+
738
+ This method trains a scikit-learn based Random Forest classifier
739
+ on the provided patterns. If scikit-learn is not available, it
740
+ falls back to a rule-based model that is always "trained".
741
+
742
+ Args:
743
+ patterns: Training patterns
744
+ labels: Labels (1 = vulnerable, 0 = safe)
745
+ validation_split: Fraction of data for validation
746
+
747
+ Returns:
748
+ Training metrics dictionary containing accuracy, precision,
749
+ recall, f1, and sample count.
750
+
751
+ Raises:
752
+ ValueError: If patterns and labels have different lengths
753
+
754
+ Example:
755
+ >>> analyzer = MLPatternAnalyzer()
756
+ >>> patterns = ["(a+)+", "^[a-z]+$", "(.*)+", "\\d+"]
757
+ >>> labels = [1, 0, 1, 0] # 1=vulnerable, 0=safe
758
+ >>> metrics = analyzer.train(patterns, labels)
759
+ >>> print(f"Accuracy: {metrics['accuracy']:.2%}")
760
+ """
761
+ if len(patterns) != len(labels):
762
+ raise ValueError("Patterns and labels must have same length")
763
+
764
+ # Import the new ML framework
765
+ from truthound.validators.security.redos.ml import (
766
+ ReDoSTrainingData,
767
+ TrainingPipeline,
768
+ TrainingConfig,
769
+ ModelType,
770
+ ReDoSMLPredictor,
771
+ )
772
+
773
+ # Create training data
774
+ training_data = ReDoSTrainingData(
775
+ patterns=list(patterns),
776
+ labels=list(labels),
777
+ )
778
+
779
+ # Configure training
780
+ config = TrainingConfig(
781
+ model_type=ModelType.RANDOM_FOREST,
782
+ test_split=validation_split,
783
+ cv_folds=5,
784
+ verbose=0,
785
+ )
786
+
787
+ # Train using the pipeline
788
+ pipeline = TrainingPipeline(config=config)
789
+ result = pipeline.train(training_data)
790
+
791
+ # Store the trained model for predictions
792
+ self._ml_predictor = ReDoSMLPredictor(model=result.model)
793
+ self._trained = True
794
+ self._metrics = result.metrics
795
+
796
+ # Return metrics as dictionary for backward compatibility
797
+ return {
798
+ "accuracy": result.metrics.accuracy,
799
+ "precision": result.metrics.precision,
800
+ "recall": result.metrics.recall,
801
+ "f1": result.metrics.f1_score,
802
+ "samples": float(len(patterns)),
803
+ }
804
+
805
+ def save_model(self, path: str | Path) -> None:
806
+ """Save the trained model to disk.
807
+
808
+ Saves the trained ML model using pickle/joblib serialization.
809
+ The model can be loaded later using load_model().
810
+
811
+ Args:
812
+ path: Path to save the model (recommended: .pkl extension)
813
+
814
+ Raises:
815
+ ValueError: If model has not been trained
816
+
817
+ Example:
818
+ >>> analyzer = MLPatternAnalyzer()
819
+ >>> analyzer.train(patterns, labels)
820
+ >>> analyzer.save_model("redos_model.pkl")
821
+ """
822
+ from truthound.validators.security.redos.ml.storage import save_model
823
+
824
+ if hasattr(self, "_ml_predictor") and self._ml_predictor is not None:
825
+ save_model(self._ml_predictor.model, path)
826
+ else:
827
+ # Fallback for legacy format
828
+ path = Path(path)
829
+ model_data = {
830
+ "version": self.VERSION,
831
+ "trained": self._trained,
832
+ "model_type": type(self._model).__name__,
833
+ "thresholds": {k.name: v for k, v in self.RISK_THRESHOLDS.items()},
834
+ }
835
+ path.write_text(json.dumps(model_data, indent=2))
836
+
837
+ def load_model(self, path: str | Path) -> None:
838
+ """Load a trained model from disk.
839
+
840
+ Loads a previously saved ML model. The loaded model will be
841
+ used for all subsequent predictions.
842
+
843
+ Args:
844
+ path: Path to the saved model
845
+
846
+ Example:
847
+ >>> analyzer = MLPatternAnalyzer()
848
+ >>> analyzer.load_model("redos_model.pkl")
849
+ >>> result = analyzer.predict("(a+)+b")
850
+ """
851
+ from truthound.validators.security.redos.ml import ReDoSMLPredictor
852
+ from truthound.validators.security.redos.ml.storage import load_model
853
+
854
+ path = Path(path)
855
+
856
+ # Try loading as new format first
857
+ try:
858
+ model = load_model(path)
859
+ self._ml_predictor = ReDoSMLPredictor(model=model)
860
+ self._trained = True
861
+ except Exception:
862
+ # Fallback to legacy JSON format
863
+ try:
864
+ model_data = json.loads(path.read_text())
865
+ self._trained = model_data.get("trained", False)
866
+ except Exception:
867
+ self._trained = False
868
+
869
+ def _probability_to_risk_level(self, probability: float) -> ReDoSRisk:
870
+ """Convert probability to risk level."""
871
+ if probability >= self.RISK_THRESHOLDS[ReDoSRisk.CRITICAL]:
872
+ return ReDoSRisk.CRITICAL
873
+ elif probability >= self.RISK_THRESHOLDS[ReDoSRisk.HIGH]:
874
+ return ReDoSRisk.HIGH
875
+ elif probability >= self.RISK_THRESHOLDS[ReDoSRisk.MEDIUM]:
876
+ return ReDoSRisk.MEDIUM
877
+ elif probability >= self.RISK_THRESHOLDS[ReDoSRisk.LOW]:
878
+ return ReDoSRisk.LOW
879
+ else:
880
+ return ReDoSRisk.NONE
881
+
882
+ def _get_contributing_factors(
883
+ self,
884
+ features: PatternFeatures,
885
+ ) -> list[tuple[str, float]]:
886
+ """Get features that contribute most to the risk prediction.
887
+
888
+ Args:
889
+ features: Extracted pattern features
890
+
891
+ Returns:
892
+ List of (feature_name, contribution) tuples, sorted by contribution
893
+ """
894
+ feature_importance = self._model.get_feature_importance()
895
+ feature_values = features.to_vector()
896
+ feature_names = PatternFeatures.feature_names()
897
+
898
+ # Calculate contributions
899
+ contributions: list[tuple[str, float]] = []
900
+ for name, importance, value in zip(feature_names, feature_importance, feature_values):
901
+ contribution = importance * value
902
+ if contribution > 0:
903
+ contributions.append((name, contribution))
904
+
905
+ # Sort by contribution (descending)
906
+ contributions.sort(key=lambda x: x[1], reverse=True)
907
+
908
+ # Return top 5 contributors
909
+ return contributions[:5]
910
+
911
+
912
+ # ============================================================================
913
+ # Convenience functions
914
+ # ============================================================================
915
+
916
+
917
+ def predict_redos_risk(
918
+ pattern: str,
919
+ analyzer: MLPatternAnalyzer | None = None,
920
+ ) -> MLPredictionResult:
921
+ """Predict ReDoS risk for a regex pattern using ML.
922
+
923
+ Args:
924
+ pattern: Regex pattern to analyze
925
+ analyzer: Optional custom analyzer
926
+
927
+ Returns:
928
+ MLPredictionResult with prediction details
929
+
930
+ Example:
931
+ result = predict_redos_risk(r"(a+)+b")
932
+ print(result.risk_level) # ReDoSRisk.CRITICAL
933
+ print(result.risk_probability) # ~0.95
934
+ """
935
+ if analyzer is None:
936
+ analyzer = MLPatternAnalyzer()
937
+ return analyzer.predict(pattern)