truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,785 @@
1
+ """Base abstractions for ReDoS ML framework.
2
+
3
+ This module defines the core protocols, data classes, and base types used
4
+ throughout the ML framework. It follows the principle of dependency inversion
5
+ by defining abstractions that concrete implementations depend on.
6
+
7
+ Design Principles:
8
+ - Protocol-based design for loose coupling
9
+ - Immutable data classes for thread safety
10
+ - Clear separation of concerns
11
+ - Extensibility through composition
12
+ """
13
+
14
+ from __future__ import annotations
15
+
16
+ import json
17
+ from abc import ABC, abstractmethod
18
+ from dataclasses import dataclass, field
19
+ from datetime import datetime
20
+ from enum import Enum
21
+ from pathlib import Path
22
+ from typing import Any, Dict, List, Optional, Protocol, Sequence, Tuple, runtime_checkable
23
+
24
+ from truthound.validators.security.redos.core import ReDoSRisk
25
+
26
+
27
+ # =============================================================================
28
+ # Enums
29
+ # =============================================================================
30
+
31
+
32
+ class ModelType(str, Enum):
33
+ """Supported model types for ReDoS prediction."""
34
+
35
+ RULE_BASED = "rule_based"
36
+ RANDOM_FOREST = "random_forest"
37
+ GRADIENT_BOOSTING = "gradient_boosting"
38
+ LOGISTIC_REGRESSION = "logistic_regression"
39
+ SVM = "svm"
40
+ NEURAL_NETWORK = "neural_network"
41
+ ENSEMBLE = "ensemble"
42
+
43
+ def __str__(self) -> str:
44
+ return self.value
45
+
46
+
47
+ # =============================================================================
48
+ # Data Classes
49
+ # =============================================================================
50
+
51
+
52
+ @dataclass(frozen=True)
53
+ class PatternFeatures:
54
+ """Immutable container for extracted pattern features.
55
+
56
+ This class holds all features extracted from a regex pattern for ML
57
+ prediction. Features are categorized into structural, quantifier,
58
+ dangerous pattern indicators, and complexity metrics.
59
+
60
+ Attributes:
61
+ length: Total length of the pattern string
62
+ group_count: Total number of groups (capture + non-capture)
63
+ capture_group_count: Number of capturing groups
64
+ non_capture_group_count: Number of non-capturing groups
65
+ max_nesting_depth: Maximum depth of nested parentheses
66
+ alternation_count: Number of alternation operators (|)
67
+ plus_count: Number of + quantifiers
68
+ star_count: Number of * quantifiers
69
+ question_count: Number of ? quantifiers (non-lookahead)
70
+ bounded_quantifier_count: Number of {n,m} quantifiers
71
+ unbounded_quantifier_count: Number of unbounded quantifiers (+, *, {n,})
72
+ lazy_quantifier_count: Number of lazy quantifiers (+?, *?, etc.)
73
+ possessive_quantifier_count: Number of possessive quantifiers
74
+ quantifier_density: Ratio of quantifiers to pattern length
75
+ nested_quantifier_count: Number of nested quantifier patterns
76
+ adjacent_quantifier_count: Number of adjacent quantifiers
77
+ quantified_alternation_count: Number of alternations with quantifiers
78
+ quantified_backreference_count: Number of backreferences with quantifiers
79
+ char_class_count: Number of character classes []
80
+ negated_char_class_count: Number of negated character classes [^]
81
+ dot_count: Number of dot metacharacters
82
+ word_boundary_count: Number of word boundary assertions
83
+ lookahead_count: Number of lookahead assertions
84
+ lookbehind_count: Number of lookbehind assertions
85
+ negative_lookaround_count: Number of negative lookaround assertions
86
+ backreference_count: Number of backreferences
87
+ max_backreference_index: Highest backreference index used
88
+ start_anchor: Whether pattern starts with ^ or \\A
89
+ end_anchor: Whether pattern ends with $ or \\Z
90
+ anchored: Whether pattern is fully anchored (both ends)
91
+ backtracking_potential: Estimated backtracking risk score (0-100)
92
+ estimated_states: Estimated number of NFA states
93
+ """
94
+
95
+ # Structural features
96
+ length: int = 0
97
+ group_count: int = 0
98
+ capture_group_count: int = 0
99
+ non_capture_group_count: int = 0
100
+ max_nesting_depth: int = 0
101
+ alternation_count: int = 0
102
+
103
+ # Quantifier features
104
+ plus_count: int = 0
105
+ star_count: int = 0
106
+ question_count: int = 0
107
+ bounded_quantifier_count: int = 0
108
+ unbounded_quantifier_count: int = 0
109
+ lazy_quantifier_count: int = 0
110
+ possessive_quantifier_count: int = 0
111
+ quantifier_density: float = 0.0
112
+
113
+ # Dangerous pattern indicators
114
+ nested_quantifier_count: int = 0
115
+ adjacent_quantifier_count: int = 0
116
+ quantified_alternation_count: int = 0
117
+ quantified_backreference_count: int = 0
118
+
119
+ # Character class features
120
+ char_class_count: int = 0
121
+ negated_char_class_count: int = 0
122
+ dot_count: int = 0
123
+ word_boundary_count: int = 0
124
+
125
+ # Lookaround features
126
+ lookahead_count: int = 0
127
+ lookbehind_count: int = 0
128
+ negative_lookaround_count: int = 0
129
+
130
+ # Backreference features
131
+ backreference_count: int = 0
132
+ max_backreference_index: int = 0
133
+
134
+ # Anchor features
135
+ start_anchor: bool = False
136
+ end_anchor: bool = False
137
+ anchored: bool = False
138
+
139
+ # Complexity metrics
140
+ backtracking_potential: float = 0.0
141
+ estimated_states: int = 0
142
+
143
+ def to_vector(self) -> List[float]:
144
+ """Convert features to a numeric vector for ML models.
145
+
146
+ Returns:
147
+ List of float values in consistent order.
148
+ """
149
+ return [
150
+ float(self.length),
151
+ float(self.group_count),
152
+ float(self.capture_group_count),
153
+ float(self.non_capture_group_count),
154
+ float(self.max_nesting_depth),
155
+ float(self.alternation_count),
156
+ float(self.plus_count),
157
+ float(self.star_count),
158
+ float(self.question_count),
159
+ float(self.bounded_quantifier_count),
160
+ float(self.unbounded_quantifier_count),
161
+ float(self.lazy_quantifier_count),
162
+ float(self.possessive_quantifier_count),
163
+ float(self.quantifier_density),
164
+ float(self.nested_quantifier_count),
165
+ float(self.adjacent_quantifier_count),
166
+ float(self.quantified_alternation_count),
167
+ float(self.quantified_backreference_count),
168
+ float(self.char_class_count),
169
+ float(self.negated_char_class_count),
170
+ float(self.dot_count),
171
+ float(self.word_boundary_count),
172
+ float(self.lookahead_count),
173
+ float(self.lookbehind_count),
174
+ float(self.negative_lookaround_count),
175
+ float(self.backreference_count),
176
+ float(self.max_backreference_index),
177
+ float(self.start_anchor),
178
+ float(self.end_anchor),
179
+ float(self.anchored),
180
+ float(self.backtracking_potential),
181
+ float(self.estimated_states),
182
+ ]
183
+
184
+ @classmethod
185
+ def feature_names(cls) -> List[str]:
186
+ """Get names of all features in vector order.
187
+
188
+ Returns:
189
+ List of feature names matching to_vector() order.
190
+ """
191
+ return [
192
+ "length",
193
+ "group_count",
194
+ "capture_group_count",
195
+ "non_capture_group_count",
196
+ "max_nesting_depth",
197
+ "alternation_count",
198
+ "plus_count",
199
+ "star_count",
200
+ "question_count",
201
+ "bounded_quantifier_count",
202
+ "unbounded_quantifier_count",
203
+ "lazy_quantifier_count",
204
+ "possessive_quantifier_count",
205
+ "quantifier_density",
206
+ "nested_quantifier_count",
207
+ "adjacent_quantifier_count",
208
+ "quantified_alternation_count",
209
+ "quantified_backreference_count",
210
+ "char_class_count",
211
+ "negated_char_class_count",
212
+ "dot_count",
213
+ "word_boundary_count",
214
+ "lookahead_count",
215
+ "lookbehind_count",
216
+ "negative_lookaround_count",
217
+ "backreference_count",
218
+ "max_backreference_index",
219
+ "start_anchor",
220
+ "end_anchor",
221
+ "anchored",
222
+ "backtracking_potential",
223
+ "estimated_states",
224
+ ]
225
+
226
+ @classmethod
227
+ def num_features(cls) -> int:
228
+ """Get the number of features."""
229
+ return len(cls.feature_names())
230
+
231
+ def to_dict(self) -> Dict[str, Any]:
232
+ """Convert to dictionary."""
233
+ return {name: value for name, value in zip(self.feature_names(), self.to_vector())}
234
+
235
+
236
+ @dataclass
237
+ class ReDoSPrediction:
238
+ """Result of ReDoS risk prediction.
239
+
240
+ Attributes:
241
+ pattern: The analyzed regex pattern
242
+ features: Extracted feature values
243
+ risk_probability: Probability of ReDoS vulnerability (0.0 to 1.0)
244
+ risk_level: Categorical risk level based on probability
245
+ confidence: Model confidence in prediction (0.0 to 1.0)
246
+ contributing_factors: Top features influencing the prediction
247
+ model_type: Type of model used for prediction
248
+ model_version: Version of the model used
249
+ inference_time_ms: Time taken for inference in milliseconds
250
+ """
251
+
252
+ pattern: str
253
+ features: PatternFeatures
254
+ risk_probability: float
255
+ risk_level: ReDoSRisk
256
+ confidence: float
257
+ contributing_factors: List[Tuple[str, float]] = field(default_factory=list)
258
+ model_type: str = ""
259
+ model_version: str = ""
260
+ inference_time_ms: float = 0.0
261
+
262
+ def to_dict(self) -> Dict[str, Any]:
263
+ """Convert to dictionary for serialization."""
264
+ return {
265
+ "pattern": self.pattern,
266
+ "features": self.features.to_dict(),
267
+ "risk_probability": round(self.risk_probability, 4),
268
+ "risk_level": self.risk_level.name,
269
+ "confidence": round(self.confidence, 4),
270
+ "contributing_factors": [
271
+ {"feature": name, "contribution": round(contrib, 4)}
272
+ for name, contrib in self.contributing_factors
273
+ ],
274
+ "model_type": self.model_type,
275
+ "model_version": self.model_version,
276
+ "inference_time_ms": round(self.inference_time_ms, 3),
277
+ }
278
+
279
+ def to_json(self) -> str:
280
+ """Convert to JSON string."""
281
+ return json.dumps(self.to_dict(), indent=2)
282
+
283
+
284
+ @dataclass
285
+ class ReDoSTrainingData:
286
+ """Container for training data.
287
+
288
+ Attributes:
289
+ patterns: List of regex patterns
290
+ labels: Corresponding labels (0=safe, 1=vulnerable)
291
+ features: Pre-extracted features (optional, can be computed)
292
+ feature_names: Names of features in feature vectors
293
+ sample_weights: Optional weights for samples
294
+ metadata: Additional metadata about the dataset
295
+ """
296
+
297
+ patterns: List[str]
298
+ labels: List[int]
299
+ features: Optional[List[List[float]]] = None
300
+ feature_names: List[str] = field(default_factory=list)
301
+ sample_weights: Optional[List[float]] = None
302
+ metadata: Dict[str, Any] = field(default_factory=dict)
303
+
304
+ def __post_init__(self):
305
+ if len(self.patterns) != len(self.labels):
306
+ raise ValueError(
307
+ f"Number of patterns ({len(self.patterns)}) must match "
308
+ f"number of labels ({len(self.labels)})"
309
+ )
310
+ if self.features is not None and len(self.features) != len(self.patterns):
311
+ raise ValueError(
312
+ f"Number of feature vectors ({len(self.features)}) must match "
313
+ f"number of patterns ({len(self.patterns)})"
314
+ )
315
+ if not self.feature_names:
316
+ self.feature_names = PatternFeatures.feature_names()
317
+
318
+ def __len__(self) -> int:
319
+ return len(self.patterns)
320
+
321
+ @property
322
+ def num_vulnerable(self) -> int:
323
+ """Count of vulnerable patterns."""
324
+ return sum(self.labels)
325
+
326
+ @property
327
+ def num_safe(self) -> int:
328
+ """Count of safe patterns."""
329
+ return len(self.labels) - sum(self.labels)
330
+
331
+ @property
332
+ def class_balance(self) -> float:
333
+ """Ratio of vulnerable to total samples."""
334
+ return self.num_vulnerable / len(self) if len(self) > 0 else 0.0
335
+
336
+
337
+ @dataclass
338
+ class ReDoSModelMetrics:
339
+ """Model evaluation metrics.
340
+
341
+ Attributes:
342
+ accuracy: Overall classification accuracy
343
+ precision: Precision for vulnerable class
344
+ recall: Recall for vulnerable class (sensitivity)
345
+ f1_score: F1 score (harmonic mean of precision and recall)
346
+ specificity: True negative rate
347
+ auc_roc: Area under ROC curve (if available)
348
+ confusion_matrix: [[TN, FP], [FN, TP]]
349
+ feature_importances: Feature importance scores (if available)
350
+ cross_val_scores: Cross-validation scores (if available)
351
+ training_samples: Number of training samples
352
+ training_time_seconds: Time taken for training
353
+ trained_at: Timestamp of training completion
354
+ """
355
+
356
+ accuracy: float
357
+ precision: float
358
+ recall: float
359
+ f1_score: float
360
+ specificity: float = 0.0
361
+ auc_roc: Optional[float] = None
362
+ confusion_matrix: Optional[List[List[int]]] = None
363
+ feature_importances: Optional[Dict[str, float]] = None
364
+ cross_val_scores: Optional[List[float]] = None
365
+ training_samples: int = 0
366
+ training_time_seconds: float = 0.0
367
+ trained_at: datetime = field(default_factory=datetime.now)
368
+
369
+ def to_dict(self) -> Dict[str, Any]:
370
+ """Convert to dictionary."""
371
+ result = {
372
+ "accuracy": round(self.accuracy, 4),
373
+ "precision": round(self.precision, 4),
374
+ "recall": round(self.recall, 4),
375
+ "f1_score": round(self.f1_score, 4),
376
+ "specificity": round(self.specificity, 4),
377
+ "training_samples": self.training_samples,
378
+ "training_time_seconds": round(self.training_time_seconds, 3),
379
+ "trained_at": self.trained_at.isoformat(),
380
+ }
381
+ if self.auc_roc is not None:
382
+ result["auc_roc"] = round(self.auc_roc, 4)
383
+ if self.confusion_matrix is not None:
384
+ result["confusion_matrix"] = self.confusion_matrix
385
+ if self.feature_importances is not None:
386
+ result["feature_importances"] = {
387
+ k: round(v, 4) for k, v in self.feature_importances.items()
388
+ }
389
+ if self.cross_val_scores is not None:
390
+ result["cross_val_scores"] = [round(s, 4) for s in self.cross_val_scores]
391
+ return result
392
+
393
+ def summary(self) -> str:
394
+ """Get a human-readable summary of metrics."""
395
+ lines = [
396
+ f"Accuracy: {self.accuracy:.2%}",
397
+ f"Precision: {self.precision:.2%}",
398
+ f"Recall: {self.recall:.2%}",
399
+ f"F1 Score: {self.f1_score:.2%}",
400
+ f"Specificity: {self.specificity:.2%}",
401
+ ]
402
+ if self.auc_roc is not None:
403
+ lines.append(f"AUC-ROC: {self.auc_roc:.4f}")
404
+ if self.cross_val_scores is not None:
405
+ mean_cv = sum(self.cross_val_scores) / len(self.cross_val_scores)
406
+ lines.append(f"CV Mean: {mean_cv:.2%}")
407
+ return "\n".join(lines)
408
+
409
+
410
+ @dataclass
411
+ class ModelConfig:
412
+ """Configuration for ReDoS ML models.
413
+
414
+ This configuration class controls all aspects of model training and
415
+ inference, from algorithm-specific hyperparameters to general training
416
+ settings.
417
+
418
+ Attributes:
419
+ model_type: Type of model to use
420
+ n_estimators: Number of estimators for ensemble methods
421
+ max_depth: Maximum tree depth
422
+ min_samples_split: Minimum samples required to split an internal node
423
+ min_samples_leaf: Minimum samples required at a leaf node
424
+ learning_rate: Learning rate for gradient-based methods
425
+ random_state: Random seed for reproducibility
426
+ n_jobs: Number of parallel jobs (-1 for all cores)
427
+ class_weight: How to handle class imbalance
428
+ feature_selection: Whether to perform feature selection
429
+ max_features: Maximum number of features to use
430
+ cross_validation_folds: Number of CV folds
431
+ validation_split: Fraction of data for validation
432
+ early_stopping: Whether to use early stopping
433
+ model_version: Version string for the model
434
+ """
435
+
436
+ model_type: ModelType = ModelType.RANDOM_FOREST
437
+ n_estimators: int = 100
438
+ max_depth: int = 10
439
+ min_samples_split: int = 5
440
+ min_samples_leaf: int = 2
441
+ learning_rate: float = 0.1
442
+ random_state: int = 42
443
+ n_jobs: int = -1
444
+ class_weight: str = "balanced"
445
+ feature_selection: bool = True
446
+ max_features: int = 50
447
+ cross_validation_folds: int = 5
448
+ validation_split: float = 0.2
449
+ early_stopping: bool = True
450
+ model_version: str = "1.0.0"
451
+
452
+ def to_dict(self) -> Dict[str, Any]:
453
+ """Convert to dictionary."""
454
+ return {
455
+ "model_type": self.model_type.value,
456
+ "n_estimators": self.n_estimators,
457
+ "max_depth": self.max_depth,
458
+ "min_samples_split": self.min_samples_split,
459
+ "min_samples_leaf": self.min_samples_leaf,
460
+ "learning_rate": self.learning_rate,
461
+ "random_state": self.random_state,
462
+ "n_jobs": self.n_jobs,
463
+ "class_weight": self.class_weight,
464
+ "feature_selection": self.feature_selection,
465
+ "max_features": self.max_features,
466
+ "cross_validation_folds": self.cross_validation_folds,
467
+ "validation_split": self.validation_split,
468
+ "early_stopping": self.early_stopping,
469
+ "model_version": self.model_version,
470
+ }
471
+
472
+ @classmethod
473
+ def from_dict(cls, data: Dict[str, Any]) -> "ModelConfig":
474
+ """Create from dictionary."""
475
+ if "model_type" in data and isinstance(data["model_type"], str):
476
+ data = dict(data)
477
+ data["model_type"] = ModelType(data["model_type"])
478
+ return cls(**data)
479
+
480
+ @classmethod
481
+ def default(cls) -> "ModelConfig":
482
+ """Create default configuration."""
483
+ return cls()
484
+
485
+ @classmethod
486
+ def fast_training(cls) -> "ModelConfig":
487
+ """Configuration optimized for fast training."""
488
+ return cls(
489
+ n_estimators=50,
490
+ max_depth=5,
491
+ cross_validation_folds=3,
492
+ )
493
+
494
+ @classmethod
495
+ def high_accuracy(cls) -> "ModelConfig":
496
+ """Configuration optimized for high accuracy."""
497
+ return cls(
498
+ model_type=ModelType.GRADIENT_BOOSTING,
499
+ n_estimators=200,
500
+ max_depth=15,
501
+ cross_validation_folds=10,
502
+ )
503
+
504
+
505
+ # =============================================================================
506
+ # Protocols
507
+ # =============================================================================
508
+
509
+
510
+ @runtime_checkable
511
+ class FeatureExtractorProtocol(Protocol):
512
+ """Protocol for feature extractors.
513
+
514
+ Feature extractors are responsible for converting raw regex patterns
515
+ into numeric feature vectors suitable for ML models.
516
+ """
517
+
518
+ def extract(self, pattern: str) -> PatternFeatures:
519
+ """Extract features from a regex pattern.
520
+
521
+ Args:
522
+ pattern: Regex pattern string
523
+
524
+ Returns:
525
+ PatternFeatures instance containing all extracted features
526
+ """
527
+ ...
528
+
529
+ def extract_batch(self, patterns: Sequence[str]) -> List[PatternFeatures]:
530
+ """Extract features from multiple patterns.
531
+
532
+ Args:
533
+ patterns: Sequence of regex pattern strings
534
+
535
+ Returns:
536
+ List of PatternFeatures instances
537
+ """
538
+ ...
539
+
540
+
541
+ @runtime_checkable
542
+ class ReDoSModelProtocol(Protocol):
543
+ """Protocol for ReDoS prediction models.
544
+
545
+ This protocol defines the interface that all ReDoS ML models must
546
+ implement, enabling polymorphic usage and easy swapping of models.
547
+ """
548
+
549
+ @property
550
+ def is_trained(self) -> bool:
551
+ """Check if the model has been trained."""
552
+ ...
553
+
554
+ @property
555
+ def config(self) -> ModelConfig:
556
+ """Get the model configuration."""
557
+ ...
558
+
559
+ def predict(self, features: List[float]) -> Tuple[float, float]:
560
+ """Predict risk probability and confidence.
561
+
562
+ Args:
563
+ features: Feature vector
564
+
565
+ Returns:
566
+ Tuple of (risk_probability, confidence)
567
+ """
568
+ ...
569
+
570
+ def predict_batch(
571
+ self, features: List[List[float]]
572
+ ) -> List[Tuple[float, float]]:
573
+ """Predict for multiple feature vectors.
574
+
575
+ Args:
576
+ features: List of feature vectors
577
+
578
+ Returns:
579
+ List of (risk_probability, confidence) tuples
580
+ """
581
+ ...
582
+
583
+ def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
584
+ """Train the model on labeled data.
585
+
586
+ Args:
587
+ data: Training data container
588
+
589
+ Returns:
590
+ Training metrics
591
+ """
592
+ ...
593
+
594
+ def get_feature_importance(self) -> List[float]:
595
+ """Get feature importance scores.
596
+
597
+ Returns:
598
+ List of importance scores for each feature
599
+ """
600
+ ...
601
+
602
+ def save(self, path: str | Path) -> None:
603
+ """Save model to disk.
604
+
605
+ Args:
606
+ path: Path to save the model
607
+ """
608
+ ...
609
+
610
+ def load(self, path: str | Path) -> None:
611
+ """Load model from disk.
612
+
613
+ Args:
614
+ path: Path to the saved model
615
+ """
616
+ ...
617
+
618
+
619
+ # =============================================================================
620
+ # Base Classes
621
+ # =============================================================================
622
+
623
+
624
+ class BaseReDoSModel(ABC):
625
+ """Abstract base class for ReDoS ML models.
626
+
627
+ This class provides common functionality shared by all model implementations,
628
+ including configuration management, feature name tracking, and serialization
629
+ utilities.
630
+
631
+ Subclasses must implement:
632
+ - predict(): Single sample prediction
633
+ - predict_batch(): Batch prediction
634
+ - train(): Model training
635
+ - _save_model_data(): Model-specific save logic
636
+ - _load_model_data(): Model-specific load logic
637
+ """
638
+
639
+ name: str = "base"
640
+ version: str = "1.0.0"
641
+
642
+ def __init__(self, config: ModelConfig | None = None):
643
+ """Initialize the model.
644
+
645
+ Args:
646
+ config: Model configuration (uses default if None)
647
+ """
648
+ self._config = config or ModelConfig.default()
649
+ self._trained = False
650
+ self._metrics: Optional[ReDoSModelMetrics] = None
651
+ self._feature_names: List[str] = PatternFeatures.feature_names()
652
+
653
+ @property
654
+ def is_trained(self) -> bool:
655
+ """Check if the model has been trained."""
656
+ return self._trained
657
+
658
+ @property
659
+ def config(self) -> ModelConfig:
660
+ """Get the model configuration."""
661
+ return self._config
662
+
663
+ @property
664
+ def metrics(self) -> Optional[ReDoSModelMetrics]:
665
+ """Get training metrics if available."""
666
+ return self._metrics
667
+
668
+ @property
669
+ def feature_names(self) -> List[str]:
670
+ """Get feature names."""
671
+ return self._feature_names
672
+
673
+ @abstractmethod
674
+ def predict(self, features: List[float]) -> Tuple[float, float]:
675
+ """Predict risk probability and confidence.
676
+
677
+ Args:
678
+ features: Feature vector
679
+
680
+ Returns:
681
+ Tuple of (risk_probability, confidence)
682
+ """
683
+ pass
684
+
685
+ @abstractmethod
686
+ def predict_batch(
687
+ self, features: List[List[float]]
688
+ ) -> List[Tuple[float, float]]:
689
+ """Predict for multiple feature vectors."""
690
+ pass
691
+
692
+ @abstractmethod
693
+ def train(self, data: ReDoSTrainingData) -> ReDoSModelMetrics:
694
+ """Train the model on labeled data."""
695
+ pass
696
+
697
+ @abstractmethod
698
+ def get_feature_importance(self) -> List[float]:
699
+ """Get feature importance scores."""
700
+ pass
701
+
702
+ @abstractmethod
703
+ def _save_model_data(self) -> Dict[str, Any]:
704
+ """Get model-specific data for saving.
705
+
706
+ Returns:
707
+ Dictionary of data to serialize
708
+ """
709
+ pass
710
+
711
+ @abstractmethod
712
+ def _load_model_data(self, data: Dict[str, Any]) -> None:
713
+ """Load model-specific data.
714
+
715
+ Args:
716
+ data: Dictionary of serialized data
717
+ """
718
+ pass
719
+
720
+ def save(self, path: str | Path) -> None:
721
+ """Save model to disk.
722
+
723
+ Args:
724
+ path: Path to save the model
725
+ """
726
+ import pickle
727
+
728
+ path = Path(path)
729
+ data = {
730
+ "name": self.name,
731
+ "version": self.version,
732
+ "config": self._config.to_dict(),
733
+ "trained": self._trained,
734
+ "metrics": self._metrics.to_dict() if self._metrics else None,
735
+ "feature_names": self._feature_names,
736
+ "model_data": self._save_model_data(),
737
+ }
738
+
739
+ path.parent.mkdir(parents=True, exist_ok=True)
740
+ with open(path, "wb") as f:
741
+ pickle.dump(data, f)
742
+
743
+ def load(self, path: str | Path) -> None:
744
+ """Load model from disk.
745
+
746
+ Args:
747
+ path: Path to the saved model
748
+ """
749
+ import pickle
750
+
751
+ path = Path(path)
752
+ with open(path, "rb") as f:
753
+ data = pickle.load(f)
754
+
755
+ self._config = ModelConfig.from_dict(data["config"])
756
+ self._trained = data["trained"]
757
+ self._feature_names = data.get("feature_names", PatternFeatures.feature_names())
758
+
759
+ if data.get("metrics"):
760
+ # Reconstruct metrics from dict
761
+ metrics_dict = data["metrics"]
762
+ self._metrics = ReDoSModelMetrics(
763
+ accuracy=metrics_dict["accuracy"],
764
+ precision=metrics_dict["precision"],
765
+ recall=metrics_dict["recall"],
766
+ f1_score=metrics_dict["f1_score"],
767
+ specificity=metrics_dict.get("specificity", 0.0),
768
+ auc_roc=metrics_dict.get("auc_roc"),
769
+ confusion_matrix=metrics_dict.get("confusion_matrix"),
770
+ feature_importances=metrics_dict.get("feature_importances"),
771
+ cross_val_scores=metrics_dict.get("cross_val_scores"),
772
+ training_samples=metrics_dict.get("training_samples", 0),
773
+ training_time_seconds=metrics_dict.get("training_time_seconds", 0.0),
774
+ )
775
+
776
+ self._load_model_data(data.get("model_data", {}))
777
+
778
+ def get_feature_importance_dict(self) -> Dict[str, float]:
779
+ """Get feature importance as a dictionary.
780
+
781
+ Returns:
782
+ Dictionary mapping feature names to importance scores
783
+ """
784
+ importance = self.get_feature_importance()
785
+ return dict(zip(self._feature_names, importance))