truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1000 @@
1
+ """ML model implementations for ReDoS prediction.
2
+
3
+ This module provides concrete implementations of ReDoS prediction models,
4
+ including rule-based baseline, scikit-learn models, and ensemble methods.
5
+
6
+ Available Models:
7
+ - RuleBasedReDoSModel: Deterministic rule-based classifier (no ML deps)
8
+ - RandomForestReDoSModel: Random Forest classifier
9
+ - GradientBoostingReDoSModel: Gradient Boosting classifier
10
+ - LogisticRegressionReDoSModel: Logistic Regression classifier
11
+ - EnsembleReDoSModel: Combines multiple models for robust predictions
12
+
13
+ Example:
14
+ >>> from truthound.validators.security.redos.ml.models import (
15
+ ... RandomForestReDoSModel,
16
+ ... create_model,
17
+ ... )
18
+ >>> model = create_model("random_forest")
19
+ >>> model.train(training_data)
20
+ >>> probability, confidence = model.predict(feature_vector)
21
+ """
22
+
23
+ from __future__ import annotations
24
+
25
+ import logging
26
+ import math
27
+ import time
28
+ from pathlib import Path
29
+ from typing import Any, Dict, List, Optional, Tuple, Type
30
+
31
+ from truthound.validators.security.redos.ml.base import (
32
+ BaseReDoSModel,
33
+ ModelConfig,
34
+ ModelType,
35
+ PatternFeatures,
36
+ ReDoSModelMetrics,
37
+ ReDoSTrainingData,
38
+ )
39
+
40
+
41
+ logger = logging.getLogger(__name__)
42
+
43
+
44
+ # =============================================================================
45
+ # Rule-Based Model (No ML Dependencies)
46
+ # =============================================================================
47
+
48
+
49
+ class RuleBasedReDoSModel(BaseReDoSModel):
50
+ """Rule-based model for ReDoS risk prediction.
51
+
52
+ This model uses hand-crafted rules based on known ReDoS patterns
53
+ to estimate risk. It serves as a baseline and fallback when
54
+ ML models are not available or not trained.
55
+
56
+ The model assigns weights to various pattern features and combines
57
+ them using a logistic function to produce a probability.
58
+
59
+ Feature weights are derived from analysis of known vulnerable patterns
60
+ and security research on regex backtracking behavior.
61
+
62
+ Attributes:
63
+ FEATURE_WEIGHTS: Dictionary mapping feature names to weights
64
+ BIAS: Bias term for logistic function
65
+ """
66
+
67
+ name = "rule_based"
68
+ version = "1.0.0"
69
+
70
+ # Feature weights learned from known vulnerable patterns
71
+ FEATURE_WEIGHTS: Dict[str, float] = {
72
+ "nested_quantifier_count": 5.0,
73
+ "quantified_backreference_count": 4.0,
74
+ "quantified_alternation_count": 3.5,
75
+ "adjacent_quantifier_count": 2.5,
76
+ "unbounded_quantifier_count": 1.5,
77
+ "max_nesting_depth": 0.8,
78
+ "star_count": 0.5,
79
+ "plus_count": 0.5,
80
+ "alternation_count": 0.3,
81
+ "quantifier_density": 2.0,
82
+ "backtracking_potential": 0.1,
83
+ }
84
+
85
+ BIAS = -2.0
86
+
87
+ def __init__(self, config: ModelConfig | None = None):
88
+ """Initialize the rule-based model."""
89
+ super().__init__(config)
90
+ self._trained = True # Always ready
91
+
92
+ def predict(self, features: List[float]) -> Tuple[float, float]:
93
+ """Predict risk probability using rules.
94
+
95
+ Args:
96
+ features: Feature vector
97
+
98
+ Returns:
99
+ Tuple of (risk_probability, confidence)
100
+ """
101
+ feature_dict = dict(zip(self._feature_names, features))
102
+
103
+ # Calculate weighted sum
104
+ weighted_sum = self.BIAS
105
+ for feature_name, weight in self.FEATURE_WEIGHTS.items():
106
+ if feature_name in feature_dict:
107
+ weighted_sum += feature_dict[feature_name] * weight
108
+
109
+ # Apply logistic function
110
+ probability = 1.0 / (1.0 + math.exp(-weighted_sum))
111
+
112
+ # Confidence based on how extreme the score is
113
+ # More extreme probabilities indicate higher confidence
114
+ confidence = abs(2 * probability - 1)
115
+
116
+ return probability, confidence
117
+
118
+ def predict_batch(
119
+ self, features: List[List[float]]
120
+ ) -> List[Tuple[float, float]]:
121
+ """Predict for multiple samples."""
122
+ return [self.predict(f) for f in features]
123
+
124
+ def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
125
+ """Rule-based model doesn't need training.
126
+
127
+ Returns default metrics indicating the model is ready.
128
+ """
129
+ # Calculate accuracy on training data for reference
130
+ if data.features is None:
131
+ from truthound.validators.security.redos.ml.features import (
132
+ PatternFeatureExtractor,
133
+ )
134
+
135
+ extractor = PatternFeatureExtractor()
136
+ features = [extractor.extract(p).to_vector() for p in data.patterns]
137
+ else:
138
+ features = data.features
139
+
140
+ predictions = [self.predict(f)[0] for f in features]
141
+ predicted_labels = [1 if p >= 0.5 else 0 for p in predictions]
142
+
143
+ # Calculate metrics
144
+ tp = sum(1 for p, l in zip(predicted_labels, data.labels) if p == 1 and l == 1)
145
+ tn = sum(1 for p, l in zip(predicted_labels, data.labels) if p == 0 and l == 0)
146
+ fp = sum(1 for p, l in zip(predicted_labels, data.labels) if p == 1 and l == 0)
147
+ fn = sum(1 for p, l in zip(predicted_labels, data.labels) if p == 0 and l == 1)
148
+
149
+ accuracy = (tp + tn) / max(len(data), 1)
150
+ precision = tp / max(tp + fp, 1)
151
+ recall = tp / max(tp + fn, 1)
152
+ f1 = 2 * precision * recall / max(precision + recall, 1e-10)
153
+ specificity = tn / max(tn + fp, 1)
154
+
155
+ self._metrics = ReDoSModelMetrics(
156
+ accuracy=accuracy,
157
+ precision=precision,
158
+ recall=recall,
159
+ f1_score=f1,
160
+ specificity=specificity,
161
+ confusion_matrix=[[tn, fp], [fn, tp]],
162
+ training_samples=len(data),
163
+ )
164
+
165
+ return self._metrics
166
+
167
+ def get_feature_importance(self) -> List[float]:
168
+ """Get feature importance based on rule weights."""
169
+ return [self.FEATURE_WEIGHTS.get(name, 0.0) for name in self._feature_names]
170
+
171
+ def _save_model_data(self) -> Dict[str, Any]:
172
+ """Save rule weights."""
173
+ return {"weights": self.FEATURE_WEIGHTS, "bias": self.BIAS}
174
+
175
+ def _load_model_data(self, data: Dict[str, Any]) -> None:
176
+ """Load rule weights."""
177
+ if "weights" in data:
178
+ self.FEATURE_WEIGHTS = data["weights"]
179
+ if "bias" in data:
180
+ self.BIAS = data["bias"]
181
+
182
+
183
+ # =============================================================================
184
+ # Scikit-Learn Based Models
185
+ # =============================================================================
186
+
187
+
188
+ def _check_sklearn_available() -> bool:
189
+ """Check if scikit-learn is available."""
190
+ try:
191
+ import sklearn
192
+
193
+ return True
194
+ except ImportError:
195
+ return False
196
+
197
+
198
+ class RandomForestReDoSModel(BaseReDoSModel):
199
+ """Random Forest classifier for ReDoS prediction.
200
+
201
+ This model uses scikit-learn's RandomForestClassifier for robust
202
+ predictions with built-in feature importance scores.
203
+
204
+ Random Forest provides:
205
+ - Robust predictions resistant to outliers
206
+ - Built-in feature importance
207
+ - Good performance with default hyperparameters
208
+ - Parallel training capability
209
+ """
210
+
211
+ name = "random_forest"
212
+ version = "1.0.0"
213
+
214
+ def __init__(self, config: ModelConfig | None = None):
215
+ """Initialize the Random Forest model."""
216
+ super().__init__(config)
217
+ self._model: Any = None
218
+ self._sklearn_available = _check_sklearn_available()
219
+
220
+ def predict(self, features: List[float]) -> Tuple[float, float]:
221
+ """Predict risk probability.
222
+
223
+ Falls back to rule-based model if not trained or sklearn unavailable.
224
+ """
225
+ # Use rule fallback if sklearn wasn't available
226
+ if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
227
+ return self._rule_fallback.predict(features)
228
+
229
+ if not self._trained or self._model is None:
230
+ return RuleBasedReDoSModel(self._config).predict(features)
231
+
232
+ import numpy as np
233
+
234
+ X = np.array([features])
235
+ proba = self._model.predict_proba(X)[0]
236
+
237
+ # proba is [P(safe), P(vulnerable)]
238
+ probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
239
+ confidence = abs(probability - 0.5) * 2 # Confidence from certainty
240
+
241
+ return probability, confidence
242
+
243
+ def predict_batch(
244
+ self, features: List[List[float]]
245
+ ) -> List[Tuple[float, float]]:
246
+ """Predict for multiple samples."""
247
+ # Use rule fallback if sklearn wasn't available
248
+ if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
249
+ return self._rule_fallback.predict_batch(features)
250
+
251
+ if not self._trained or self._model is None:
252
+ fallback = RuleBasedReDoSModel(self._config)
253
+ return fallback.predict_batch(features)
254
+
255
+ import numpy as np
256
+
257
+ X = np.array(features)
258
+ probas = self._model.predict_proba(X)
259
+
260
+ results = []
261
+ for proba in probas:
262
+ probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
263
+ confidence = abs(probability - 0.5) * 2
264
+ results.append((probability, confidence))
265
+
266
+ return results
267
+
268
+ def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
269
+ """Train the Random Forest model.
270
+
271
+ Args:
272
+ data: Training data container
273
+
274
+ Returns:
275
+ Training metrics
276
+ """
277
+ if not self._sklearn_available:
278
+ logger.warning("scikit-learn not available, using rule-based fallback")
279
+ # Use rule-based model internally but mark as trained
280
+ self._rule_fallback = RuleBasedReDoSModel(self._config)
281
+ metrics = self._rule_fallback.train(data)
282
+ self._trained = True
283
+ self._metrics = metrics
284
+ return metrics
285
+
286
+ import numpy as np
287
+ from sklearn.ensemble import RandomForestClassifier
288
+ from sklearn.metrics import (
289
+ accuracy_score,
290
+ confusion_matrix,
291
+ f1_score,
292
+ precision_score,
293
+ recall_score,
294
+ roc_auc_score,
295
+ )
296
+ from sklearn.model_selection import cross_val_score, train_test_split
297
+
298
+ start_time = time.time()
299
+
300
+ # Prepare features
301
+ if data.features is None:
302
+ from truthound.validators.security.redos.ml.features import (
303
+ PatternFeatureExtractor,
304
+ )
305
+
306
+ extractor = PatternFeatureExtractor()
307
+ X = np.array([extractor.extract(p).to_vector() for p in data.patterns])
308
+ else:
309
+ X = np.array(data.features)
310
+
311
+ y = np.array(data.labels)
312
+
313
+ # Split data
314
+ X_train, X_test, y_train, y_test = train_test_split(
315
+ X,
316
+ y,
317
+ test_size=self._config.validation_split,
318
+ random_state=self._config.random_state,
319
+ stratify=y if len(set(y)) > 1 else None,
320
+ )
321
+
322
+ # Create and train model
323
+ self._model = RandomForestClassifier(
324
+ n_estimators=self._config.n_estimators,
325
+ max_depth=self._config.max_depth,
326
+ min_samples_split=self._config.min_samples_split,
327
+ min_samples_leaf=self._config.min_samples_leaf,
328
+ random_state=self._config.random_state,
329
+ n_jobs=self._config.n_jobs,
330
+ class_weight=self._config.class_weight,
331
+ )
332
+
333
+ self._model.fit(X_train, y_train)
334
+
335
+ # Evaluate
336
+ y_pred = self._model.predict(X_test)
337
+ y_proba = self._model.predict_proba(X_test)
338
+
339
+ # Cross-validation
340
+ cv_scores = cross_val_score(
341
+ self._model,
342
+ X,
343
+ y,
344
+ cv=min(self._config.cross_validation_folds, len(set(y))),
345
+ n_jobs=self._config.n_jobs,
346
+ )
347
+
348
+ # Calculate metrics
349
+ accuracy = accuracy_score(y_test, y_pred)
350
+ precision = precision_score(y_test, y_pred, zero_division=0)
351
+ recall = recall_score(y_test, y_pred, zero_division=0)
352
+ f1 = f1_score(y_test, y_pred, zero_division=0)
353
+ cm = confusion_matrix(y_test, y_pred)
354
+ specificity = cm[0, 0] / max(cm[0].sum(), 1) if len(cm) > 1 else 0.0
355
+
356
+ # AUC-ROC if we have probability predictions
357
+ try:
358
+ auc = roc_auc_score(y_test, y_proba[:, 1]) if len(set(y_test)) > 1 else None
359
+ except Exception:
360
+ auc = None
361
+
362
+ # Feature importances
363
+ importances = dict(
364
+ zip(self._feature_names, self._model.feature_importances_.tolist())
365
+ )
366
+
367
+ training_time = time.time() - start_time
368
+ self._trained = True
369
+
370
+ self._metrics = ReDoSModelMetrics(
371
+ accuracy=accuracy,
372
+ precision=precision,
373
+ recall=recall,
374
+ f1_score=f1,
375
+ specificity=specificity,
376
+ auc_roc=auc,
377
+ confusion_matrix=cm.tolist(),
378
+ feature_importances=importances,
379
+ cross_val_scores=cv_scores.tolist(),
380
+ training_samples=len(data),
381
+ training_time_seconds=training_time,
382
+ )
383
+
384
+ logger.info(f"Random Forest training complete. Accuracy: {accuracy:.2%}, F1: {f1:.2%}")
385
+
386
+ return self._metrics
387
+
388
+ def get_feature_importance(self) -> List[float]:
389
+ """Get feature importance from trained model."""
390
+ if self._model is not None and hasattr(self._model, "feature_importances_"):
391
+ return self._model.feature_importances_.tolist()
392
+ return [0.0] * len(self._feature_names)
393
+
394
+ def _save_model_data(self) -> Dict[str, Any]:
395
+ """Save the sklearn model."""
396
+ return {"sklearn_model": self._model}
397
+
398
+ def _load_model_data(self, data: Dict[str, Any]) -> None:
399
+ """Load the sklearn model."""
400
+ self._model = data.get("sklearn_model")
401
+ if self._model is not None:
402
+ self._trained = True
403
+
404
+
405
+ class GradientBoostingReDoSModel(BaseReDoSModel):
406
+ """Gradient Boosting classifier for ReDoS prediction.
407
+
408
+ This model uses scikit-learn's GradientBoostingClassifier which
409
+ often provides higher accuracy than Random Forest through
410
+ sequential boosting.
411
+
412
+ Gradient Boosting provides:
413
+ - Often higher accuracy than Random Forest
414
+ - Good handling of imbalanced classes
415
+ - Built-in feature importance
416
+ """
417
+
418
+ name = "gradient_boosting"
419
+ version = "1.0.0"
420
+
421
+ def __init__(self, config: ModelConfig | None = None):
422
+ """Initialize the Gradient Boosting model."""
423
+ super().__init__(config)
424
+ self._model: Any = None
425
+ self._sklearn_available = _check_sklearn_available()
426
+
427
+ def predict(self, features: List[float]) -> Tuple[float, float]:
428
+ """Predict risk probability."""
429
+ # Use rule fallback if sklearn wasn't available
430
+ if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
431
+ return self._rule_fallback.predict(features)
432
+
433
+ if not self._trained or self._model is None:
434
+ return RuleBasedReDoSModel(self._config).predict(features)
435
+
436
+ import numpy as np
437
+
438
+ X = np.array([features])
439
+ proba = self._model.predict_proba(X)[0]
440
+
441
+ probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
442
+ confidence = abs(probability - 0.5) * 2
443
+
444
+ return probability, confidence
445
+
446
+ def predict_batch(
447
+ self, features: List[List[float]]
448
+ ) -> List[Tuple[float, float]]:
449
+ """Predict for multiple samples."""
450
+ # Use rule fallback if sklearn wasn't available
451
+ if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
452
+ return self._rule_fallback.predict_batch(features)
453
+
454
+ if not self._trained or self._model is None:
455
+ fallback = RuleBasedReDoSModel(self._config)
456
+ return fallback.predict_batch(features)
457
+
458
+ import numpy as np
459
+
460
+ X = np.array(features)
461
+ probas = self._model.predict_proba(X)
462
+
463
+ results = []
464
+ for proba in probas:
465
+ probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
466
+ confidence = abs(probability - 0.5) * 2
467
+ results.append((probability, confidence))
468
+
469
+ return results
470
+
471
+ def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
472
+ """Train the Gradient Boosting model."""
473
+ if not self._sklearn_available:
474
+ logger.warning("scikit-learn not available, using rule-based fallback")
475
+ self._rule_fallback = RuleBasedReDoSModel(self._config)
476
+ metrics = self._rule_fallback.train(data)
477
+ self._trained = True
478
+ self._metrics = metrics
479
+ return metrics
480
+
481
+ import numpy as np
482
+ from sklearn.ensemble import GradientBoostingClassifier
483
+ from sklearn.metrics import (
484
+ accuracy_score,
485
+ confusion_matrix,
486
+ f1_score,
487
+ precision_score,
488
+ recall_score,
489
+ roc_auc_score,
490
+ )
491
+ from sklearn.model_selection import cross_val_score, train_test_split
492
+
493
+ start_time = time.time()
494
+
495
+ # Prepare features
496
+ if data.features is None:
497
+ from truthound.validators.security.redos.ml.features import (
498
+ PatternFeatureExtractor,
499
+ )
500
+
501
+ extractor = PatternFeatureExtractor()
502
+ X = np.array([extractor.extract(p).to_vector() for p in data.patterns])
503
+ else:
504
+ X = np.array(data.features)
505
+
506
+ y = np.array(data.labels)
507
+
508
+ # Split data
509
+ X_train, X_test, y_train, y_test = train_test_split(
510
+ X,
511
+ y,
512
+ test_size=self._config.validation_split,
513
+ random_state=self._config.random_state,
514
+ stratify=y if len(set(y)) > 1 else None,
515
+ )
516
+
517
+ # Create and train model (GB works better with shallow trees)
518
+ self._model = GradientBoostingClassifier(
519
+ n_estimators=self._config.n_estimators,
520
+ max_depth=min(self._config.max_depth, 5),
521
+ min_samples_split=self._config.min_samples_split,
522
+ min_samples_leaf=self._config.min_samples_leaf,
523
+ learning_rate=self._config.learning_rate,
524
+ random_state=self._config.random_state,
525
+ )
526
+
527
+ self._model.fit(X_train, y_train)
528
+
529
+ # Evaluate
530
+ y_pred = self._model.predict(X_test)
531
+ y_proba = self._model.predict_proba(X_test)
532
+
533
+ # Cross-validation
534
+ cv_scores = cross_val_score(
535
+ self._model,
536
+ X,
537
+ y,
538
+ cv=min(self._config.cross_validation_folds, len(set(y))),
539
+ )
540
+
541
+ # Calculate metrics
542
+ accuracy = accuracy_score(y_test, y_pred)
543
+ precision = precision_score(y_test, y_pred, zero_division=0)
544
+ recall = recall_score(y_test, y_pred, zero_division=0)
545
+ f1 = f1_score(y_test, y_pred, zero_division=0)
546
+ cm = confusion_matrix(y_test, y_pred)
547
+ specificity = cm[0, 0] / max(cm[0].sum(), 1) if len(cm) > 1 else 0.0
548
+
549
+ try:
550
+ auc = roc_auc_score(y_test, y_proba[:, 1]) if len(set(y_test)) > 1 else None
551
+ except Exception:
552
+ auc = None
553
+
554
+ importances = dict(
555
+ zip(self._feature_names, self._model.feature_importances_.tolist())
556
+ )
557
+
558
+ training_time = time.time() - start_time
559
+ self._trained = True
560
+
561
+ self._metrics = ReDoSModelMetrics(
562
+ accuracy=accuracy,
563
+ precision=precision,
564
+ recall=recall,
565
+ f1_score=f1,
566
+ specificity=specificity,
567
+ auc_roc=auc,
568
+ confusion_matrix=cm.tolist(),
569
+ feature_importances=importances,
570
+ cross_val_scores=cv_scores.tolist(),
571
+ training_samples=len(data),
572
+ training_time_seconds=training_time,
573
+ )
574
+
575
+ logger.info(f"Gradient Boosting training complete. Accuracy: {accuracy:.2%}, F1: {f1:.2%}")
576
+
577
+ return self._metrics
578
+
579
+ def get_feature_importance(self) -> List[float]:
580
+ """Get feature importance from trained model."""
581
+ if self._model is not None and hasattr(self._model, "feature_importances_"):
582
+ return self._model.feature_importances_.tolist()
583
+ return [0.0] * len(self._feature_names)
584
+
585
+ def _save_model_data(self) -> Dict[str, Any]:
586
+ """Save the sklearn model."""
587
+ return {"sklearn_model": self._model}
588
+
589
+ def _load_model_data(self, data: Dict[str, Any]) -> None:
590
+ """Load the sklearn model."""
591
+ self._model = data.get("sklearn_model")
592
+ if self._model is not None:
593
+ self._trained = True
594
+
595
+
596
+ class LogisticRegressionReDoSModel(BaseReDoSModel):
597
+ """Logistic Regression classifier for ReDoS prediction.
598
+
599
+ Simple linear model that provides interpretable coefficients
600
+ and fast training/inference.
601
+ """
602
+
603
+ name = "logistic_regression"
604
+ version = "1.0.0"
605
+
606
+ def __init__(self, config: ModelConfig | None = None):
607
+ """Initialize the Logistic Regression model."""
608
+ super().__init__(config)
609
+ self._model: Any = None
610
+ self._sklearn_available = _check_sklearn_available()
611
+
612
+ def predict(self, features: List[float]) -> Tuple[float, float]:
613
+ """Predict risk probability."""
614
+ # Use rule fallback if sklearn wasn't available
615
+ if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
616
+ return self._rule_fallback.predict(features)
617
+
618
+ if not self._trained or self._model is None:
619
+ return RuleBasedReDoSModel(self._config).predict(features)
620
+
621
+ import numpy as np
622
+
623
+ X = np.array([features])
624
+ proba = self._model.predict_proba(X)[0]
625
+
626
+ probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
627
+ confidence = abs(probability - 0.5) * 2
628
+
629
+ return probability, confidence
630
+
631
+ def predict_batch(
632
+ self, features: List[List[float]]
633
+ ) -> List[Tuple[float, float]]:
634
+ """Predict for multiple samples."""
635
+ # Use rule fallback if sklearn wasn't available
636
+ if hasattr(self, "_rule_fallback") and self._rule_fallback is not None:
637
+ return self._rule_fallback.predict_batch(features)
638
+
639
+ if not self._trained or self._model is None:
640
+ fallback = RuleBasedReDoSModel(self._config)
641
+ return fallback.predict_batch(features)
642
+
643
+ import numpy as np
644
+
645
+ X = np.array(features)
646
+ probas = self._model.predict_proba(X)
647
+
648
+ results = []
649
+ for proba in probas:
650
+ probability = float(proba[1]) if len(proba) > 1 else float(proba[0])
651
+ confidence = abs(probability - 0.5) * 2
652
+ results.append((probability, confidence))
653
+
654
+ return results
655
+
656
+ def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
657
+ """Train the Logistic Regression model."""
658
+ if not self._sklearn_available:
659
+ logger.warning("scikit-learn not available, using rule-based fallback")
660
+ self._rule_fallback = RuleBasedReDoSModel(self._config)
661
+ metrics = self._rule_fallback.train(data)
662
+ self._trained = True
663
+ self._metrics = metrics
664
+ return metrics
665
+
666
+ import numpy as np
667
+ from sklearn.linear_model import LogisticRegression
668
+ from sklearn.metrics import (
669
+ accuracy_score,
670
+ confusion_matrix,
671
+ f1_score,
672
+ precision_score,
673
+ recall_score,
674
+ roc_auc_score,
675
+ )
676
+ from sklearn.model_selection import cross_val_score, train_test_split
677
+ from sklearn.preprocessing import StandardScaler
678
+
679
+ start_time = time.time()
680
+
681
+ # Prepare features
682
+ if data.features is None:
683
+ from truthound.validators.security.redos.ml.features import (
684
+ PatternFeatureExtractor,
685
+ )
686
+
687
+ extractor = PatternFeatureExtractor()
688
+ X = np.array([extractor.extract(p).to_vector() for p in data.patterns])
689
+ else:
690
+ X = np.array(data.features)
691
+
692
+ y = np.array(data.labels)
693
+
694
+ # Scale features for logistic regression
695
+ self._scaler = StandardScaler()
696
+ X_scaled = self._scaler.fit_transform(X)
697
+
698
+ # Split data
699
+ X_train, X_test, y_train, y_test = train_test_split(
700
+ X_scaled,
701
+ y,
702
+ test_size=self._config.validation_split,
703
+ random_state=self._config.random_state,
704
+ stratify=y if len(set(y)) > 1 else None,
705
+ )
706
+
707
+ # Create and train model
708
+ self._model = LogisticRegression(
709
+ random_state=self._config.random_state,
710
+ class_weight=self._config.class_weight,
711
+ max_iter=1000,
712
+ n_jobs=self._config.n_jobs,
713
+ )
714
+
715
+ self._model.fit(X_train, y_train)
716
+
717
+ # Evaluate
718
+ y_pred = self._model.predict(X_test)
719
+ y_proba = self._model.predict_proba(X_test)
720
+
721
+ # Cross-validation
722
+ cv_scores = cross_val_score(
723
+ self._model,
724
+ X_scaled,
725
+ y,
726
+ cv=min(self._config.cross_validation_folds, len(set(y))),
727
+ )
728
+
729
+ # Calculate metrics
730
+ accuracy = accuracy_score(y_test, y_pred)
731
+ precision = precision_score(y_test, y_pred, zero_division=0)
732
+ recall = recall_score(y_test, y_pred, zero_division=0)
733
+ f1 = f1_score(y_test, y_pred, zero_division=0)
734
+ cm = confusion_matrix(y_test, y_pred)
735
+ specificity = cm[0, 0] / max(cm[0].sum(), 1) if len(cm) > 1 else 0.0
736
+
737
+ try:
738
+ auc = roc_auc_score(y_test, y_proba[:, 1]) if len(set(y_test)) > 1 else None
739
+ except Exception:
740
+ auc = None
741
+
742
+ # Feature importance from coefficients
743
+ coeffs = self._model.coef_[0] if len(self._model.coef_.shape) > 1 else self._model.coef_
744
+ importances = dict(zip(self._feature_names, np.abs(coeffs).tolist()))
745
+
746
+ training_time = time.time() - start_time
747
+ self._trained = True
748
+
749
+ self._metrics = ReDoSModelMetrics(
750
+ accuracy=accuracy,
751
+ precision=precision,
752
+ recall=recall,
753
+ f1_score=f1,
754
+ specificity=specificity,
755
+ auc_roc=auc,
756
+ confusion_matrix=cm.tolist(),
757
+ feature_importances=importances,
758
+ cross_val_scores=cv_scores.tolist(),
759
+ training_samples=len(data),
760
+ training_time_seconds=training_time,
761
+ )
762
+
763
+ logger.info(f"Logistic Regression training complete. Accuracy: {accuracy:.2%}, F1: {f1:.2%}")
764
+
765
+ return self._metrics
766
+
767
+ def get_feature_importance(self) -> List[float]:
768
+ """Get feature importance from coefficients."""
769
+ if self._model is not None and hasattr(self._model, "coef_"):
770
+ import numpy as np
771
+
772
+ coeffs = self._model.coef_[0] if len(self._model.coef_.shape) > 1 else self._model.coef_
773
+ return np.abs(coeffs).tolist()
774
+ return [0.0] * len(self._feature_names)
775
+
776
+ def _save_model_data(self) -> Dict[str, Any]:
777
+ """Save the sklearn model and scaler."""
778
+ return {
779
+ "sklearn_model": self._model,
780
+ "scaler": getattr(self, "_scaler", None),
781
+ }
782
+
783
+ def _load_model_data(self, data: Dict[str, Any]) -> None:
784
+ """Load the sklearn model and scaler."""
785
+ self._model = data.get("sklearn_model")
786
+ self._scaler = data.get("scaler")
787
+ if self._model is not None:
788
+ self._trained = True
789
+
790
+
791
+ class EnsembleReDoSModel(BaseReDoSModel):
792
+ """Ensemble model combining multiple prediction strategies.
793
+
794
+ This model combines rule-based heuristics with pattern signature
795
+ matching for robust predictions even without training data.
796
+
797
+ The ensemble uses weighted voting from:
798
+ - Rule-based baseline model
799
+ - Trained ML model (if available)
800
+ - Pattern signature matching
801
+
802
+ This approach provides robust fallback behavior while leveraging
803
+ ML improvements when trained models are available.
804
+ """
805
+
806
+ name = "ensemble"
807
+ version = "1.0.0"
808
+
809
+ # Known dangerous pattern signatures with risk scores
810
+ DANGEROUS_SIGNATURES: List[Tuple[str, float]] = [
811
+ (r"\([^)]*[+*][^)]*\)[+*]", 0.95), # Nested quantifiers
812
+ (r"\\[1-9][+*]", 0.85), # Quantified backreference
813
+ (r"\([^)]*\|[^)]*\)[+*]", 0.75), # Quantified alternation
814
+ (r"[+*][+*]", 0.65), # Adjacent quantifiers
815
+ (r"\([^)]*\)\{[\d,]+\}\{", 0.70), # Nested bounded quantifiers
816
+ ]
817
+
818
+ def __init__(
819
+ self,
820
+ config: ModelConfig | None = None,
821
+ ml_model: BaseReDoSModel | None = None,
822
+ ):
823
+ """Initialize the ensemble model.
824
+
825
+ Args:
826
+ config: Model configuration
827
+ ml_model: Optional trained ML model to include in ensemble
828
+ """
829
+ import re
830
+
831
+ super().__init__(config)
832
+ self._rule_model = RuleBasedReDoSModel(config)
833
+ self._ml_model = ml_model
834
+ self._trained = True # Rule-based is always ready
835
+
836
+ # Compile signature patterns
837
+ self._compiled_signatures = [
838
+ (re.compile(pattern), risk) for pattern, risk in self.DANGEROUS_SIGNATURES
839
+ ]
840
+
841
+ def predict(
842
+ self, features: List[float], pattern: str = ""
843
+ ) -> Tuple[float, float]:
844
+ """Predict using ensemble of methods.
845
+
846
+ Args:
847
+ features: Feature vector
848
+ pattern: Original pattern for signature matching
849
+
850
+ Returns:
851
+ Tuple of (risk_probability, confidence)
852
+ """
853
+ # Rule-based prediction
854
+ rule_prob, rule_conf = self._rule_model.predict(features)
855
+
856
+ # ML model prediction (if available and trained)
857
+ ml_prob, ml_conf = 0.0, 0.0
858
+ if self._ml_model is not None and self._ml_model.is_trained:
859
+ ml_prob, ml_conf = self._ml_model.predict(features)
860
+
861
+ # Pattern signature matching
862
+ sig_prob = 0.0
863
+ if pattern:
864
+ for sig_pattern, risk in self._compiled_signatures:
865
+ if sig_pattern.search(pattern):
866
+ sig_prob = max(sig_prob, risk)
867
+
868
+ # Combine predictions
869
+ if self._ml_model is not None and self._ml_model.is_trained:
870
+ # ML model available: weighted average of all three
871
+ if sig_prob > 0:
872
+ final_prob = 0.4 * ml_prob + 0.35 * sig_prob + 0.25 * rule_prob
873
+ final_conf = max(ml_conf, 0.9)
874
+ else:
875
+ final_prob = 0.6 * ml_prob + 0.4 * rule_prob
876
+ final_conf = (ml_conf + rule_conf) / 2
877
+ else:
878
+ # No ML model: combine rule-based with signatures
879
+ if sig_prob > 0:
880
+ final_prob = 0.6 * sig_prob + 0.4 * rule_prob
881
+ final_conf = max(rule_conf, 0.9)
882
+ else:
883
+ final_prob = rule_prob
884
+ final_conf = rule_conf
885
+
886
+ return final_prob, final_conf
887
+
888
+ def predict_batch(
889
+ self, features: List[List[float]]
890
+ ) -> List[Tuple[float, float]]:
891
+ """Predict for multiple samples (without pattern context)."""
892
+ return [self.predict(f) for f in features]
893
+
894
+ def predict_with_pattern(
895
+ self, features: List[float], pattern: str
896
+ ) -> Tuple[float, float]:
897
+ """Predict with pattern context for signature matching."""
898
+ return self.predict(features, pattern)
899
+
900
+ def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
901
+ """Train the ML component of the ensemble.
902
+
903
+ The rule-based component doesn't need training, but the ML
904
+ component benefits from training data.
905
+ """
906
+ # Create and train ML model if not provided
907
+ if self._ml_model is None:
908
+ self._ml_model = RandomForestReDoSModel(self._config)
909
+
910
+ metrics = self._ml_model.train(data)
911
+ self._metrics = metrics
912
+ return metrics
913
+
914
+ def get_feature_importance(self) -> List[float]:
915
+ """Get feature importance from rule model."""
916
+ if self._ml_model is not None and self._ml_model.is_trained:
917
+ return self._ml_model.get_feature_importance()
918
+ return self._rule_model.get_feature_importance()
919
+
920
+ def _save_model_data(self) -> Dict[str, Any]:
921
+ """Save ensemble components."""
922
+ return {
923
+ "ml_model_data": (
924
+ self._ml_model._save_model_data()
925
+ if self._ml_model is not None
926
+ else None
927
+ ),
928
+ "ml_model_type": (
929
+ self._ml_model.name if self._ml_model is not None else None
930
+ ),
931
+ }
932
+
933
+ def _load_model_data(self, data: Dict[str, Any]) -> None:
934
+ """Load ensemble components."""
935
+ if data.get("ml_model_type") and data.get("ml_model_data"):
936
+ model_type = data["ml_model_type"]
937
+ self._ml_model = create_model(model_type, self._config)
938
+ self._ml_model._load_model_data(data["ml_model_data"])
939
+
940
+
941
+ # =============================================================================
942
+ # Model Registry and Factory
943
+ # =============================================================================
944
+
945
+
946
+ MODEL_REGISTRY: Dict[str, Type[BaseReDoSModel]] = {
947
+ "rule_based": RuleBasedReDoSModel,
948
+ "random_forest": RandomForestReDoSModel,
949
+ "gradient_boosting": GradientBoostingReDoSModel,
950
+ "logistic_regression": LogisticRegressionReDoSModel,
951
+ "ensemble": EnsembleReDoSModel,
952
+ }
953
+
954
+
955
+ def create_model(
956
+ model_type: str | ModelType,
957
+ config: ModelConfig | None = None,
958
+ ) -> BaseReDoSModel:
959
+ """Create a ReDoS model by type.
960
+
961
+ Args:
962
+ model_type: Type of model to create
963
+ config: Optional model configuration
964
+
965
+ Returns:
966
+ Instantiated model
967
+
968
+ Raises:
969
+ ValueError: If model type is not recognized
970
+ """
971
+ if isinstance(model_type, ModelType):
972
+ model_type = model_type.value
973
+
974
+ model_class = MODEL_REGISTRY.get(model_type)
975
+ if model_class is None:
976
+ available = ", ".join(MODEL_REGISTRY.keys())
977
+ raise ValueError(
978
+ f"Unknown model type: {model_type}. Available types: {available}"
979
+ )
980
+
981
+ return model_class(config)
982
+
983
+
984
+ def register_model(name: str, model_class: Type[BaseReDoSModel]) -> None:
985
+ """Register a custom model type.
986
+
987
+ Args:
988
+ name: Name to register the model under
989
+ model_class: Model class to register
990
+ """
991
+ MODEL_REGISTRY[name] = model_class
992
+
993
+
994
+ def list_available_models() -> List[str]:
995
+ """List all available model types.
996
+
997
+ Returns:
998
+ List of model type names
999
+ """
1000
+ return list(MODEL_REGISTRY.keys())