truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,886 @@
1
+ """Advanced ReDoS (Regular Expression Denial of Service) Protection.
2
+
3
+ This module provides comprehensive protection against ReDoS attacks:
4
+ - Static analysis of regex patterns for dangerous constructs
5
+ - Complexity estimation for potential exponential backtracking
6
+ - Safe regex compilation with configurable limits
7
+ - Runtime execution monitoring
8
+
9
+ ReDoS attacks exploit the exponential time complexity of certain regex
10
+ patterns, causing validation to hang or consume excessive CPU.
11
+
12
+ Architecture:
13
+ ┌────────────────────────────────────────────────────────────────┐
14
+ │ ReDoS Protection Pipeline │
15
+ └────────────────────────────────────────────────────────────────┘
16
+
17
+ ┌───────────────┬───────────────┼───────────────┬────────────────┐
18
+ │ │ │ │ │
19
+ ▼ ▼ ▼ ▼ ▼
20
+ ┌─────────┐ ┌─────────┐ ┌──────────┐ ┌──────────┐ ┌────────┐
21
+ │ Static │ │Complexity│ │ Quantifier│ │Alternation│ │ Safe │
22
+ │ Analysis│ │ Estimator│ │ Analysis │ │ Analysis │ │ Compile│
23
+ └─────────┘ └─────────┘ └──────────┘ └──────────┘ └────────┘
24
+
25
+ Usage:
26
+ from truthound.validators.security.redos import (
27
+ check_regex_safety,
28
+ analyze_regex_complexity,
29
+ create_safe_regex,
30
+ )
31
+
32
+ # Quick safety check
33
+ is_safe, warning = check_regex_safety(r"(a+)+")
34
+ # is_safe = False, warning = "Nested quantifiers detected"
35
+
36
+ # Detailed analysis
37
+ result = analyze_regex_complexity(r"^[a-z]+@[a-z]+\\.com$")
38
+ print(result.risk_level) # ReDoSRisk.LOW
39
+ print(result.complexity_score) # 2.5
40
+
41
+ # Create safe regex with limits
42
+ pattern = create_safe_regex(r"^\\w+$", max_length=1000)
43
+ """
44
+
45
+ from __future__ import annotations
46
+
47
+ import re
48
+ import time
49
+ import threading
50
+ from dataclasses import dataclass, field
51
+ from enum import Enum, auto
52
+ from typing import Any, Callable
53
+
54
+
55
+ class ReDoSRisk(Enum):
56
+ """Risk level for ReDoS vulnerability."""
57
+
58
+ NONE = auto() # No known vulnerability patterns
59
+ LOW = auto() # Minor concerns, likely safe
60
+ MEDIUM = auto() # Some concerning patterns, use with caution
61
+ HIGH = auto() # Dangerous patterns detected, avoid
62
+ CRITICAL = auto() # Known ReDoS pattern, reject
63
+
64
+
65
+ @dataclass(frozen=True)
66
+ class SafeRegexConfig:
67
+ """Configuration for safe regex operations.
68
+
69
+ Attributes:
70
+ max_pattern_length: Maximum pattern length (chars)
71
+ max_groups: Maximum capture groups allowed
72
+ max_quantifier_range: Maximum {n,m} range (m-n)
73
+ max_alternations: Maximum alternation branches
74
+ max_nested_depth: Maximum nesting depth
75
+ allow_backreferences: Whether to allow backreferences
76
+ allow_lookaround: Whether to allow lookahead/lookbehind
77
+ timeout_seconds: Max execution time for matching
78
+ max_input_length: Maximum input string length to match
79
+ """
80
+
81
+ max_pattern_length: int = 1000
82
+ max_groups: int = 20
83
+ max_quantifier_range: int = 100
84
+ max_alternations: int = 50
85
+ max_nested_depth: int = 10
86
+ allow_backreferences: bool = False
87
+ allow_lookaround: bool = True
88
+ timeout_seconds: float = 1.0
89
+ max_input_length: int = 100_000
90
+
91
+ @classmethod
92
+ def strict(cls) -> "SafeRegexConfig":
93
+ """Create strict configuration for untrusted patterns."""
94
+ return cls(
95
+ max_pattern_length=500,
96
+ max_groups=10,
97
+ max_quantifier_range=50,
98
+ max_alternations=20,
99
+ max_nested_depth=5,
100
+ allow_backreferences=False,
101
+ allow_lookaround=False,
102
+ timeout_seconds=0.5,
103
+ max_input_length=10_000,
104
+ )
105
+
106
+ @classmethod
107
+ def lenient(cls) -> "SafeRegexConfig":
108
+ """Create lenient configuration for trusted patterns."""
109
+ return cls(
110
+ max_pattern_length=5000,
111
+ max_groups=50,
112
+ max_quantifier_range=1000,
113
+ max_alternations=100,
114
+ max_nested_depth=20,
115
+ allow_backreferences=True,
116
+ allow_lookaround=True,
117
+ timeout_seconds=5.0,
118
+ max_input_length=1_000_000,
119
+ )
120
+
121
+
122
+ @dataclass
123
+ class RegexAnalysisResult:
124
+ """Result of regex pattern analysis.
125
+
126
+ Attributes:
127
+ pattern: The analyzed pattern
128
+ risk_level: Overall ReDoS risk level
129
+ complexity_score: Numeric complexity estimate (0-100)
130
+ warnings: List of warning messages
131
+ dangerous_constructs: List of detected dangerous constructs
132
+ metrics: Detailed pattern metrics
133
+ is_safe: Whether the pattern is considered safe
134
+ recommendation: Suggested action or alternative
135
+ """
136
+
137
+ pattern: str
138
+ risk_level: ReDoSRisk
139
+ complexity_score: float
140
+ warnings: list[str] = field(default_factory=list)
141
+ dangerous_constructs: list[str] = field(default_factory=list)
142
+ metrics: dict[str, Any] = field(default_factory=dict)
143
+ is_safe: bool = True
144
+ recommendation: str = ""
145
+
146
+ def to_dict(self) -> dict[str, Any]:
147
+ """Convert to dictionary."""
148
+ return {
149
+ "pattern": self.pattern,
150
+ "risk_level": self.risk_level.name,
151
+ "complexity_score": round(self.complexity_score, 2),
152
+ "warnings": self.warnings,
153
+ "dangerous_constructs": self.dangerous_constructs,
154
+ "metrics": self.metrics,
155
+ "is_safe": self.is_safe,
156
+ "recommendation": self.recommendation,
157
+ }
158
+
159
+
160
+ class RegexComplexityAnalyzer:
161
+ """Analyzes regex patterns for complexity and ReDoS vulnerability.
162
+
163
+ This analyzer performs static analysis on regex patterns to detect
164
+ potentially dangerous constructs that could lead to exponential
165
+ backtracking (ReDoS attacks).
166
+
167
+ Detection Categories:
168
+ 1. Nested Quantifiers: (a+)+ - exponential backtracking
169
+ 2. Overlapping Alternation: (a|a)+ - ambiguous matching
170
+ 3. Polynomial Backtracking: a*b*c*d* on non-matching input
171
+ 4. Atomic Group Absence: Patterns that would benefit from atomic groups
172
+ 5. Catastrophic Backreference: (a+)\\1+ with long inputs
173
+
174
+ Example:
175
+ analyzer = RegexComplexityAnalyzer()
176
+ result = analyzer.analyze(r"(a+)+b")
177
+ print(result.risk_level) # ReDoSRisk.CRITICAL
178
+ print(result.dangerous_constructs) # ["nested_quantifiers"]
179
+ """
180
+
181
+ # Dangerous pattern signatures
182
+ DANGEROUS_PATTERNS: list[tuple[str, str, ReDoSRisk]] = [
183
+ # Nested quantifiers - exponential
184
+ (r"\([^)]*[+*][^)]*\)[+*]", "nested_quantifiers", ReDoSRisk.CRITICAL),
185
+ (r"\([^)]*[+*][^)]*\)\{[0-9]+,\}", "nested_quantifiers_bounded", ReDoSRisk.CRITICAL),
186
+
187
+ # Nested groups with quantifiers
188
+ (r"\(\([^)]*\)[+*]\)[+*]", "deeply_nested_quantifiers", ReDoSRisk.CRITICAL),
189
+
190
+ # Overlapping character classes in alternation
191
+ (r"\([^)]*\|[^)]*\)[+*]", "alternation_with_quantifier", ReDoSRisk.HIGH),
192
+
193
+ # Backreference with quantifier
194
+ (r"\\[0-9]+[+*]", "quantified_backreference", ReDoSRisk.HIGH),
195
+ (r"\\[0-9]+\{[0-9]+,\}", "bounded_quantified_backreference", ReDoSRisk.HIGH),
196
+
197
+ # Multiple adjacent quantifiers (greedy conflict)
198
+ (r"[+*][+*]", "adjacent_quantifiers", ReDoSRisk.MEDIUM),
199
+
200
+ # Long alternation chains
201
+ (r"(?:\|[^|)]+){10,}", "long_alternation_chain", ReDoSRisk.MEDIUM),
202
+
203
+ # Greedy quantifier followed by same pattern
204
+ (r"\.+\.", "greedy_dot_conflict", ReDoSRisk.MEDIUM),
205
+ (r"\.\*\.", "greedy_dotstar_conflict", ReDoSRisk.MEDIUM),
206
+
207
+ # Unbounded repetition at start
208
+ (r"^[+*]", "start_with_quantifier", ReDoSRisk.LOW),
209
+
210
+ # Possessive/atomic group simulation (not actually supported in Python)
211
+ (r"\(\?\>", "atomic_group_attempt", ReDoSRisk.LOW),
212
+ ]
213
+
214
+ # Quantifier patterns for extraction
215
+ QUANTIFIER_PATTERN = re.compile(
216
+ r"""
217
+ (?:
218
+ \+\?? | # + or +?
219
+ \*\?? | # * or *?
220
+ \?\?? | # ? or ??
221
+ \{(\d+)\} | # {n}
222
+ \{(\d+),\} | # {n,}
223
+ \{(\d+),(\d+)\} # {n,m}
224
+ )
225
+ """,
226
+ re.VERBOSE,
227
+ )
228
+
229
+ def __init__(self, config: SafeRegexConfig | None = None):
230
+ """Initialize the analyzer.
231
+
232
+ Args:
233
+ config: Safety configuration
234
+ """
235
+ self.config = config or SafeRegexConfig()
236
+ self._compile_dangerous_patterns()
237
+
238
+ def _compile_dangerous_patterns(self) -> None:
239
+ """Pre-compile dangerous pattern detectors."""
240
+ self._compiled_patterns: list[tuple[re.Pattern, str, ReDoSRisk]] = []
241
+ for pattern_str, name, risk in self.DANGEROUS_PATTERNS:
242
+ try:
243
+ compiled = re.compile(pattern_str)
244
+ self._compiled_patterns.append((compiled, name, risk))
245
+ except re.error:
246
+ # Skip invalid patterns
247
+ pass
248
+
249
+ def analyze(self, pattern: str) -> RegexAnalysisResult:
250
+ """Analyze a regex pattern for ReDoS vulnerability.
251
+
252
+ Args:
253
+ pattern: Regex pattern to analyze
254
+
255
+ Returns:
256
+ RegexAnalysisResult with risk assessment
257
+ """
258
+ warnings: list[str] = []
259
+ dangerous_constructs: list[str] = []
260
+ max_risk = ReDoSRisk.NONE
261
+ complexity_score = 0.0
262
+
263
+ # Basic validation
264
+ if not pattern:
265
+ return RegexAnalysisResult(
266
+ pattern=pattern,
267
+ risk_level=ReDoSRisk.NONE,
268
+ complexity_score=0.0,
269
+ is_safe=True,
270
+ )
271
+
272
+ # Check pattern length
273
+ if len(pattern) > self.config.max_pattern_length:
274
+ warnings.append(
275
+ f"Pattern length ({len(pattern)}) exceeds limit "
276
+ f"({self.config.max_pattern_length})"
277
+ )
278
+ max_risk = max(max_risk, ReDoSRisk.MEDIUM, key=lambda r: r.value)
279
+ complexity_score += 10
280
+
281
+ # Extract metrics
282
+ metrics = self._extract_metrics(pattern)
283
+
284
+ # Check group count
285
+ if metrics["group_count"] > self.config.max_groups:
286
+ warnings.append(
287
+ f"Too many groups ({metrics['group_count']} > {self.config.max_groups})"
288
+ )
289
+ complexity_score += 5
290
+
291
+ # Check nesting depth
292
+ if metrics["max_nesting"] > self.config.max_nested_depth:
293
+ warnings.append(
294
+ f"Nesting too deep ({metrics['max_nesting']} > {self.config.max_nested_depth})"
295
+ )
296
+ complexity_score += 15
297
+ max_risk = max(max_risk, ReDoSRisk.MEDIUM, key=lambda r: r.value)
298
+
299
+ # Check for backreferences
300
+ if metrics["has_backreference"] and not self.config.allow_backreferences:
301
+ warnings.append("Backreferences not allowed")
302
+ dangerous_constructs.append("backreference")
303
+ complexity_score += 20
304
+ max_risk = max(max_risk, ReDoSRisk.HIGH, key=lambda r: r.value)
305
+
306
+ # Check for lookaround
307
+ if metrics["has_lookaround"] and not self.config.allow_lookaround:
308
+ warnings.append("Lookaround assertions not allowed")
309
+ complexity_score += 5
310
+
311
+ # Check quantifier ranges
312
+ for qmin, qmax in metrics.get("quantifier_ranges", []):
313
+ if qmax is not None and qmax - qmin > self.config.max_quantifier_range:
314
+ warnings.append(
315
+ f"Quantifier range too large: {{{qmin},{qmax}}}"
316
+ )
317
+ complexity_score += 10
318
+
319
+ # Check for dangerous patterns
320
+ for compiled, name, risk in self._compiled_patterns:
321
+ if compiled.search(pattern):
322
+ dangerous_constructs.append(name)
323
+ max_risk = max(max_risk, risk, key=lambda r: r.value)
324
+ complexity_score += self._risk_to_score(risk)
325
+
326
+ # Additional heuristic checks
327
+ complexity_score += self._analyze_quantifier_density(pattern)
328
+ complexity_score += self._analyze_alternation_complexity(pattern)
329
+
330
+ # Determine if safe
331
+ is_safe = max_risk.value <= ReDoSRisk.LOW.value
332
+
333
+ # Generate recommendation
334
+ recommendation = self._generate_recommendation(
335
+ max_risk, dangerous_constructs, warnings
336
+ )
337
+
338
+ return RegexAnalysisResult(
339
+ pattern=pattern,
340
+ risk_level=max_risk,
341
+ complexity_score=min(complexity_score, 100),
342
+ warnings=warnings,
343
+ dangerous_constructs=dangerous_constructs,
344
+ metrics=metrics,
345
+ is_safe=is_safe,
346
+ recommendation=recommendation,
347
+ )
348
+
349
+ def _extract_metrics(self, pattern: str) -> dict[str, Any]:
350
+ """Extract metrics from pattern.
351
+
352
+ Args:
353
+ pattern: Regex pattern
354
+
355
+ Returns:
356
+ Dictionary of metrics
357
+ """
358
+ metrics: dict[str, Any] = {
359
+ "length": len(pattern),
360
+ "group_count": 0,
361
+ "max_nesting": 0,
362
+ "quantifier_count": 0,
363
+ "alternation_count": pattern.count("|"),
364
+ "has_backreference": bool(re.search(r"\\[1-9]", pattern)),
365
+ "has_lookaround": bool(re.search(r"\(\?[=!<]", pattern)),
366
+ "has_atomic": bool(re.search(r"\(\?>", pattern)),
367
+ "quantifier_ranges": [],
368
+ }
369
+
370
+ # Count groups and nesting
371
+ depth = 0
372
+ max_depth = 0
373
+ for char in pattern:
374
+ if char == "(":
375
+ depth += 1
376
+ max_depth = max(max_depth, depth)
377
+ elif char == ")":
378
+ depth = max(0, depth - 1)
379
+
380
+ metrics["group_count"] = pattern.count("(") - pattern.count("(?")
381
+ metrics["max_nesting"] = max_depth
382
+
383
+ # Extract quantifier information
384
+ for match in self.QUANTIFIER_PATTERN.finditer(pattern):
385
+ metrics["quantifier_count"] += 1
386
+ groups = match.groups()
387
+ if groups[0]: # {n}
388
+ n = int(groups[0])
389
+ metrics["quantifier_ranges"].append((n, n))
390
+ elif groups[1]: # {n,}
391
+ n = int(groups[1])
392
+ metrics["quantifier_ranges"].append((n, None))
393
+ elif groups[2] and groups[3]: # {n,m}
394
+ n, m = int(groups[2]), int(groups[3])
395
+ metrics["quantifier_ranges"].append((n, m))
396
+
397
+ return metrics
398
+
399
+ def _analyze_quantifier_density(self, pattern: str) -> float:
400
+ """Analyze quantifier density for complexity.
401
+
402
+ High density of quantifiers increases backtracking potential.
403
+ """
404
+ quantifier_chars = sum(1 for c in pattern if c in "+*?{}")
405
+ if len(pattern) == 0:
406
+ return 0
407
+ density = quantifier_chars / len(pattern)
408
+ return density * 20 # Scale to 0-20
409
+
410
+ def _analyze_alternation_complexity(self, pattern: str) -> float:
411
+ """Analyze alternation complexity.
412
+
413
+ Overlapping alternatives can cause exponential matching.
414
+ """
415
+ if "|" not in pattern:
416
+ return 0
417
+
418
+ # Count alternations in quantified groups
419
+ quantified_alt_pattern = r"\([^)]*\|[^)]*\)[+*?]"
420
+ matches = re.findall(quantified_alt_pattern, pattern)
421
+
422
+ return len(matches) * 15 # Each quantified alternation adds risk
423
+
424
+ def _risk_to_score(self, risk: ReDoSRisk) -> float:
425
+ """Convert risk level to complexity score contribution."""
426
+ scores = {
427
+ ReDoSRisk.NONE: 0,
428
+ ReDoSRisk.LOW: 5,
429
+ ReDoSRisk.MEDIUM: 15,
430
+ ReDoSRisk.HIGH: 30,
431
+ ReDoSRisk.CRITICAL: 50,
432
+ }
433
+ return scores.get(risk, 0)
434
+
435
+ def _generate_recommendation(
436
+ self,
437
+ risk: ReDoSRisk,
438
+ constructs: list[str],
439
+ warnings: list[str],
440
+ ) -> str:
441
+ """Generate recommendation based on analysis.
442
+
443
+ Args:
444
+ risk: Overall risk level
445
+ constructs: Dangerous constructs found
446
+ warnings: Warning messages
447
+
448
+ Returns:
449
+ Recommendation string
450
+ """
451
+ if risk == ReDoSRisk.NONE:
452
+ return "Pattern appears safe."
453
+
454
+ if risk == ReDoSRisk.LOW:
455
+ return "Pattern has minor concerns but is likely safe for typical inputs."
456
+
457
+ if risk == ReDoSRisk.MEDIUM:
458
+ return (
459
+ "Pattern has moderate risk. Consider simplifying or adding input "
460
+ "length limits."
461
+ )
462
+
463
+ if risk == ReDoSRisk.HIGH:
464
+ recommendations = ["Pattern has high ReDoS risk. Consider:"]
465
+ if "nested_quantifiers" in constructs:
466
+ recommendations.append("- Avoid nested quantifiers like (a+)+")
467
+ if "alternation_with_quantifier" in constructs:
468
+ recommendations.append("- Avoid quantified alternation like (a|b)+")
469
+ if "quantified_backreference" in constructs:
470
+ recommendations.append("- Avoid quantified backreferences like (a+)\\1+")
471
+ recommendations.append("- Use possessive quantifiers if available")
472
+ recommendations.append("- Limit input length strictly")
473
+ return "\n".join(recommendations)
474
+
475
+ # CRITICAL
476
+ return (
477
+ "CRITICAL: Pattern contains known ReDoS vulnerability. "
478
+ "Do NOT use with untrusted input. Rewrite the pattern to avoid "
479
+ "nested quantifiers and overlapping alternatives."
480
+ )
481
+
482
+
483
+ class RegexSafetyChecker:
484
+ """High-level API for checking regex pattern safety.
485
+
486
+ This class provides a simple interface for validating regex patterns
487
+ before use. It combines static analysis with optional runtime testing.
488
+
489
+ Example:
490
+ checker = RegexSafetyChecker()
491
+
492
+ # Quick check
493
+ is_safe, warning = checker.check(r"^[a-z]+$")
494
+ # is_safe = True, warning = None
495
+
496
+ # Check dangerous pattern
497
+ is_safe, warning = checker.check(r"(a+)+b")
498
+ # is_safe = False, warning = "Nested quantifiers detected..."
499
+
500
+ # Check with custom config
501
+ config = SafeRegexConfig.strict()
502
+ checker = RegexSafetyChecker(config)
503
+ """
504
+
505
+ def __init__(self, config: SafeRegexConfig | None = None):
506
+ """Initialize the checker.
507
+
508
+ Args:
509
+ config: Safety configuration
510
+ """
511
+ self.config = config or SafeRegexConfig()
512
+ self.analyzer = RegexComplexityAnalyzer(self.config)
513
+
514
+ def check(self, pattern: str) -> tuple[bool, str | None]:
515
+ """Check if a regex pattern is safe to use.
516
+
517
+ Args:
518
+ pattern: Regex pattern to check
519
+
520
+ Returns:
521
+ Tuple of (is_safe, warning_message)
522
+ """
523
+ # Length check
524
+ if len(pattern) > self.config.max_pattern_length:
525
+ return False, f"Pattern too long ({len(pattern)} > {self.config.max_pattern_length})"
526
+
527
+ # Syntax validation
528
+ try:
529
+ re.compile(pattern)
530
+ except re.error as e:
531
+ return False, f"Invalid regex syntax: {e}"
532
+
533
+ # Analyze for ReDoS
534
+ result = self.analyzer.analyze(pattern)
535
+
536
+ if not result.is_safe:
537
+ warnings = "; ".join(result.warnings) if result.warnings else ""
538
+ constructs = ", ".join(result.dangerous_constructs)
539
+ message = f"ReDoS risk ({result.risk_level.name})"
540
+ if constructs:
541
+ message += f": {constructs}"
542
+ if warnings:
543
+ message += f". {warnings}"
544
+ return False, message
545
+
546
+ return True, None
547
+
548
+ def check_pattern(self, pattern: str) -> tuple[bool, str | None]:
549
+ """Alias for check() for backward compatibility."""
550
+ return self.check(pattern)
551
+
552
+ def analyze(self, pattern: str) -> RegexAnalysisResult:
553
+ """Get detailed analysis of a pattern.
554
+
555
+ Args:
556
+ pattern: Regex pattern to analyze
557
+
558
+ Returns:
559
+ RegexAnalysisResult with full details
560
+ """
561
+ return self.analyzer.analyze(pattern)
562
+
563
+ def validate_and_compile(
564
+ self,
565
+ pattern: str,
566
+ flags: int = 0,
567
+ ) -> re.Pattern:
568
+ """Validate pattern and compile if safe.
569
+
570
+ Args:
571
+ pattern: Regex pattern
572
+ flags: Regex flags
573
+
574
+ Returns:
575
+ Compiled pattern
576
+
577
+ Raises:
578
+ ValueError: If pattern is unsafe or invalid
579
+ """
580
+ is_safe, warning = self.check(pattern)
581
+ if not is_safe:
582
+ raise ValueError(f"Unsafe regex pattern: {warning}")
583
+
584
+ return re.compile(pattern, flags)
585
+
586
+
587
+ class SafeRegexExecutor:
588
+ """Execute regex matching with timeout protection.
589
+
590
+ This class wraps regex operations to prevent ReDoS by enforcing
591
+ timeouts on matching operations.
592
+
593
+ Example:
594
+ executor = SafeRegexExecutor(timeout_seconds=1.0)
595
+
596
+ # Safe execution
597
+ result = executor.match(r"^[a-z]+$", "hello")
598
+ # result = <Match object>
599
+
600
+ # Timeout on dangerous pattern
601
+ result = executor.match(r"(a+)+b", "a" * 30)
602
+ # Raises TimeoutError after 1 second
603
+ """
604
+
605
+ def __init__(
606
+ self,
607
+ timeout_seconds: float = 1.0,
608
+ max_input_length: int = 100_000,
609
+ ):
610
+ """Initialize the executor.
611
+
612
+ Args:
613
+ timeout_seconds: Maximum execution time
614
+ max_input_length: Maximum input string length
615
+ """
616
+ self.timeout_seconds = timeout_seconds
617
+ self.max_input_length = max_input_length
618
+
619
+ def match(
620
+ self,
621
+ pattern: str | re.Pattern,
622
+ string: str,
623
+ flags: int = 0,
624
+ ) -> re.Match | None:
625
+ """Execute regex match with timeout.
626
+
627
+ Args:
628
+ pattern: Regex pattern or compiled pattern
629
+ string: String to match
630
+ flags: Regex flags (if pattern is string)
631
+
632
+ Returns:
633
+ Match object or None
634
+
635
+ Raises:
636
+ TimeoutError: If matching exceeds timeout
637
+ ValueError: If input exceeds max length
638
+ """
639
+ if len(string) > self.max_input_length:
640
+ raise ValueError(
641
+ f"Input too long ({len(string)} > {self.max_input_length})"
642
+ )
643
+
644
+ if isinstance(pattern, str):
645
+ compiled = re.compile(pattern, flags)
646
+ else:
647
+ compiled = pattern
648
+
649
+ return self._execute_with_timeout(compiled.match, string)
650
+
651
+ def search(
652
+ self,
653
+ pattern: str | re.Pattern,
654
+ string: str,
655
+ flags: int = 0,
656
+ ) -> re.Match | None:
657
+ """Execute regex search with timeout.
658
+
659
+ Args:
660
+ pattern: Regex pattern or compiled pattern
661
+ string: String to search
662
+ flags: Regex flags
663
+
664
+ Returns:
665
+ Match object or None
666
+
667
+ Raises:
668
+ TimeoutError: If search exceeds timeout
669
+ """
670
+ if len(string) > self.max_input_length:
671
+ raise ValueError(
672
+ f"Input too long ({len(string)} > {self.max_input_length})"
673
+ )
674
+
675
+ if isinstance(pattern, str):
676
+ compiled = re.compile(pattern, flags)
677
+ else:
678
+ compiled = pattern
679
+
680
+ return self._execute_with_timeout(compiled.search, string)
681
+
682
+ def findall(
683
+ self,
684
+ pattern: str | re.Pattern,
685
+ string: str,
686
+ flags: int = 0,
687
+ ) -> list[Any]:
688
+ """Execute regex findall with timeout.
689
+
690
+ Args:
691
+ pattern: Regex pattern or compiled pattern
692
+ string: String to search
693
+ flags: Regex flags
694
+
695
+ Returns:
696
+ List of matches
697
+
698
+ Raises:
699
+ TimeoutError: If operation exceeds timeout
700
+ """
701
+ if len(string) > self.max_input_length:
702
+ raise ValueError(
703
+ f"Input too long ({len(string)} > {self.max_input_length})"
704
+ )
705
+
706
+ if isinstance(pattern, str):
707
+ compiled = re.compile(pattern, flags)
708
+ else:
709
+ compiled = pattern
710
+
711
+ return self._execute_with_timeout(compiled.findall, string)
712
+
713
+ def _execute_with_timeout(
714
+ self,
715
+ func: Callable,
716
+ *args: Any,
717
+ ) -> Any:
718
+ """Execute function with timeout.
719
+
720
+ Uses threading for cross-platform timeout support.
721
+
722
+ Args:
723
+ func: Function to execute
724
+ *args: Function arguments
725
+
726
+ Returns:
727
+ Function result
728
+
729
+ Raises:
730
+ TimeoutError: If execution exceeds timeout
731
+ """
732
+ result: list[Any] = [None]
733
+ exception: list[Exception | None] = [None]
734
+ completed = threading.Event()
735
+
736
+ def target() -> None:
737
+ try:
738
+ result[0] = func(*args)
739
+ except Exception as e:
740
+ exception[0] = e
741
+ finally:
742
+ completed.set()
743
+
744
+ thread = threading.Thread(target=target, daemon=True)
745
+ thread.start()
746
+
747
+ if not completed.wait(timeout=self.timeout_seconds):
748
+ raise TimeoutError(
749
+ f"Regex operation timed out after {self.timeout_seconds}s"
750
+ )
751
+
752
+ if exception[0]:
753
+ raise exception[0]
754
+
755
+ return result[0]
756
+
757
+
758
+ # ============================================================================
759
+ # Module-level convenience functions
760
+ # ============================================================================
761
+
762
+
763
+ def check_regex_safety(
764
+ pattern: str,
765
+ config: SafeRegexConfig | None = None,
766
+ ) -> tuple[bool, str | None]:
767
+ """Check if a regex pattern is safe to use.
768
+
769
+ Args:
770
+ pattern: Regex pattern to check
771
+ config: Optional safety configuration
772
+
773
+ Returns:
774
+ Tuple of (is_safe, warning_message)
775
+
776
+ Example:
777
+ is_safe, warning = check_regex_safety(r"(a+)+b")
778
+ # is_safe = False, warning = "ReDoS risk (CRITICAL): nested_quantifiers"
779
+ """
780
+ checker = RegexSafetyChecker(config)
781
+ return checker.check(pattern)
782
+
783
+
784
+ def analyze_regex_complexity(
785
+ pattern: str,
786
+ config: SafeRegexConfig | None = None,
787
+ ) -> RegexAnalysisResult:
788
+ """Get detailed complexity analysis of a regex pattern.
789
+
790
+ Args:
791
+ pattern: Regex pattern to analyze
792
+ config: Optional safety configuration
793
+
794
+ Returns:
795
+ RegexAnalysisResult with full analysis
796
+
797
+ Example:
798
+ result = analyze_regex_complexity(r"^[a-z]+@[a-z]+\\.com$")
799
+ print(result.risk_level) # ReDoSRisk.LOW
800
+ print(result.complexity_score) # 2.5
801
+ """
802
+ analyzer = RegexComplexityAnalyzer(config)
803
+ return analyzer.analyze(pattern)
804
+
805
+
806
+ def create_safe_regex(
807
+ pattern: str,
808
+ flags: int = 0,
809
+ config: SafeRegexConfig | None = None,
810
+ ) -> re.Pattern:
811
+ """Create a compiled regex pattern after safety validation.
812
+
813
+ Args:
814
+ pattern: Regex pattern to compile
815
+ flags: Regex flags
816
+ config: Optional safety configuration
817
+
818
+ Returns:
819
+ Compiled regex pattern
820
+
821
+ Raises:
822
+ ValueError: If pattern is unsafe or invalid
823
+
824
+ Example:
825
+ try:
826
+ compiled = create_safe_regex(r"^[a-z]+$")
827
+ # Use compiled pattern...
828
+ except ValueError as e:
829
+ print(f"Unsafe pattern: {e}")
830
+ """
831
+ checker = RegexSafetyChecker(config)
832
+ return checker.validate_and_compile(pattern, flags)
833
+
834
+
835
+ def safe_match(
836
+ pattern: str,
837
+ string: str,
838
+ timeout: float = 1.0,
839
+ flags: int = 0,
840
+ ) -> re.Match | None:
841
+ """Execute regex match with timeout protection.
842
+
843
+ Args:
844
+ pattern: Regex pattern
845
+ string: String to match
846
+ timeout: Maximum execution time in seconds
847
+ flags: Regex flags
848
+
849
+ Returns:
850
+ Match object or None
851
+
852
+ Raises:
853
+ TimeoutError: If matching exceeds timeout
854
+ ValueError: If input is too long
855
+
856
+ Example:
857
+ result = safe_match(r"^[a-z]+$", "hello", timeout=0.5)
858
+ if result:
859
+ print("Matched!")
860
+ """
861
+ executor = SafeRegexExecutor(timeout_seconds=timeout)
862
+ return executor.match(pattern, string, flags)
863
+
864
+
865
+ def safe_search(
866
+ pattern: str,
867
+ string: str,
868
+ timeout: float = 1.0,
869
+ flags: int = 0,
870
+ ) -> re.Match | None:
871
+ """Execute regex search with timeout protection.
872
+
873
+ Args:
874
+ pattern: Regex pattern
875
+ string: String to search
876
+ timeout: Maximum execution time in seconds
877
+ flags: Regex flags
878
+
879
+ Returns:
880
+ Match object or None
881
+
882
+ Raises:
883
+ TimeoutError: If search exceeds timeout
884
+ """
885
+ executor = SafeRegexExecutor(timeout_seconds=timeout)
886
+ return executor.search(pattern, string, flags)