truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1151 @@
1
+ """YAML validation and error handling for custom patterns.
2
+
3
+ This module provides comprehensive YAML validation with detailed,
4
+ user-friendly error messages for pattern configuration files.
5
+
6
+ Key Features:
7
+ - JSON Schema-based validation
8
+ - Detailed error messages with line numbers
9
+ - Suggestions for common mistakes
10
+ - Path-based error reporting
11
+ - YAML syntax error handling
12
+
13
+ Example:
14
+ from truthound.profiler.yaml_validation import (
15
+ YAMLValidator,
16
+ ValidationError,
17
+ validate_pattern_yaml,
18
+ )
19
+
20
+ # Validate YAML content
21
+ try:
22
+ result = validate_pattern_yaml(yaml_content)
23
+ except ValidationError as e:
24
+ print(e.format_error())
25
+
26
+ # With file path for better error messages
27
+ result = validate_pattern_yaml(yaml_content, source_path="patterns.yaml")
28
+ """
29
+
30
+ from __future__ import annotations
31
+
32
+ import json
33
+ import re
34
+ from dataclasses import dataclass, field
35
+ from enum import Enum
36
+ from pathlib import Path
37
+ from typing import Any
38
+
39
+
40
+ # =============================================================================
41
+ # Error Types
42
+ # =============================================================================
43
+
44
+
45
+ class ErrorSeverity(str, Enum):
46
+ """Severity levels for validation errors."""
47
+
48
+ ERROR = "error" # Must be fixed
49
+ WARNING = "warning" # Should be fixed
50
+ INFO = "info" # Informational
51
+
52
+
53
+ class ErrorCode(str, Enum):
54
+ """Error codes for categorization."""
55
+
56
+ # YAML Syntax
57
+ YAML_SYNTAX = "YAML001"
58
+ YAML_ENCODING = "YAML002"
59
+ YAML_STRUCTURE = "YAML003"
60
+
61
+ # Schema
62
+ MISSING_REQUIRED = "SCHEMA001"
63
+ INVALID_TYPE = "SCHEMA002"
64
+ INVALID_VALUE = "SCHEMA003"
65
+ UNKNOWN_FIELD = "SCHEMA004"
66
+ CONSTRAINT_VIOLATION = "SCHEMA005"
67
+
68
+ # Pattern Specific
69
+ INVALID_REGEX = "PATTERN001"
70
+ REGEX_COMPLEXITY = "PATTERN002"
71
+ EXAMPLE_MISMATCH = "PATTERN003"
72
+ DUPLICATE_ID = "PATTERN004"
73
+ CIRCULAR_EXTENDS = "PATTERN005"
74
+
75
+ # General
76
+ FILE_NOT_FOUND = "FILE001"
77
+ PERMISSION_DENIED = "FILE002"
78
+ ENCODING_ERROR = "FILE003"
79
+
80
+
81
+ @dataclass
82
+ class SourceLocation:
83
+ """Location in source file.
84
+
85
+ Attributes:
86
+ line: Line number (1-based)
87
+ column: Column number (1-based)
88
+ path: JSON path to the error location
89
+ """
90
+
91
+ line: int = 0
92
+ column: int = 0
93
+ path: str = ""
94
+
95
+ def __str__(self) -> str:
96
+ if self.line and self.column:
97
+ return f"line {self.line}, column {self.column}"
98
+ elif self.line:
99
+ return f"line {self.line}"
100
+ elif self.path:
101
+ return f"at {self.path}"
102
+ return "unknown location"
103
+
104
+
105
+ @dataclass
106
+ class ValidationError:
107
+ """Detailed validation error.
108
+
109
+ Attributes:
110
+ code: Error code for categorization
111
+ message: Human-readable error message
112
+ severity: Error severity
113
+ location: Source location
114
+ context: Contextual information
115
+ suggestion: Suggested fix
116
+ source_snippet: Relevant source code snippet
117
+ """
118
+
119
+ code: ErrorCode
120
+ message: str
121
+ severity: ErrorSeverity = ErrorSeverity.ERROR
122
+ location: SourceLocation = field(default_factory=SourceLocation)
123
+ context: dict[str, Any] = field(default_factory=dict)
124
+ suggestion: str = ""
125
+ source_snippet: str = ""
126
+
127
+ def to_dict(self) -> dict[str, Any]:
128
+ """Convert to dictionary."""
129
+ return {
130
+ "code": self.code.value,
131
+ "message": self.message,
132
+ "severity": self.severity.value,
133
+ "location": {
134
+ "line": self.location.line,
135
+ "column": self.location.column,
136
+ "path": self.location.path,
137
+ },
138
+ "context": self.context,
139
+ "suggestion": self.suggestion,
140
+ }
141
+
142
+ def format_error(self, source_path: str = "") -> str:
143
+ """Format error for display.
144
+
145
+ Args:
146
+ source_path: Path to source file for context
147
+
148
+ Returns:
149
+ Formatted error message
150
+ """
151
+ lines = []
152
+
153
+ # Header with severity and code
154
+ severity_symbol = {
155
+ ErrorSeverity.ERROR: "✗",
156
+ ErrorSeverity.WARNING: "⚠",
157
+ ErrorSeverity.INFO: "ℹ",
158
+ }[self.severity]
159
+
160
+ header = f"{severity_symbol} [{self.code.value}] {self.message}"
161
+ lines.append(header)
162
+
163
+ # Location
164
+ if source_path:
165
+ loc_str = str(self.location) if self.location.line else ""
166
+ if loc_str:
167
+ lines.append(f" --> {source_path}:{loc_str}")
168
+ else:
169
+ lines.append(f" --> {source_path}")
170
+ elif self.location.path:
171
+ lines.append(f" --> at: {self.location.path}")
172
+
173
+ # Source snippet
174
+ if self.source_snippet:
175
+ lines.append("")
176
+ for i, snippet_line in enumerate(self.source_snippet.split("\n")):
177
+ if self.location.line:
178
+ line_num = self.location.line + i
179
+ lines.append(f" {line_num} | {snippet_line}")
180
+ else:
181
+ lines.append(f" | {snippet_line}")
182
+
183
+ # Pointer to error column
184
+ if self.location.column:
185
+ pointer = " " * (self.location.column + 5) + "^"
186
+ lines.append(pointer)
187
+
188
+ # Suggestion
189
+ if self.suggestion:
190
+ lines.append("")
191
+ lines.append(f" 💡 Suggestion: {self.suggestion}")
192
+
193
+ return "\n".join(lines)
194
+
195
+
196
+ class YAMLValidationException(Exception):
197
+ """Exception raised for YAML validation errors.
198
+
199
+ Attributes:
200
+ errors: List of validation errors
201
+ source_path: Path to source file
202
+ """
203
+
204
+ def __init__(
205
+ self,
206
+ errors: list[ValidationError],
207
+ source_path: str = "",
208
+ ):
209
+ self.errors = errors
210
+ self.source_path = source_path
211
+
212
+ # Create summary message
213
+ error_count = sum(1 for e in errors if e.severity == ErrorSeverity.ERROR)
214
+ warning_count = sum(1 for e in errors if e.severity == ErrorSeverity.WARNING)
215
+
216
+ message = f"{error_count} error(s)"
217
+ if warning_count:
218
+ message += f", {warning_count} warning(s)"
219
+
220
+ super().__init__(message)
221
+
222
+ def format_errors(self) -> str:
223
+ """Format all errors for display."""
224
+ lines = []
225
+
226
+ for error in self.errors:
227
+ lines.append(error.format_error(self.source_path))
228
+ lines.append("")
229
+
230
+ # Summary
231
+ error_count = sum(1 for e in self.errors if e.severity == ErrorSeverity.ERROR)
232
+ warning_count = sum(1 for e in self.errors if e.severity == ErrorSeverity.WARNING)
233
+
234
+ lines.append(f"Found {error_count} error(s) and {warning_count} warning(s)")
235
+
236
+ return "\n".join(lines)
237
+
238
+
239
+ # =============================================================================
240
+ # Schema Definition
241
+ # =============================================================================
242
+
243
+
244
+ PATTERN_SCHEMA = {
245
+ "type": "object",
246
+ "properties": {
247
+ "version": {
248
+ "type": "string",
249
+ "pattern": r"^\d+\.\d+$",
250
+ "description": "Schema version (e.g., '1.0')",
251
+ },
252
+ "name": {
253
+ "type": "string",
254
+ "minLength": 1,
255
+ "maxLength": 100,
256
+ "description": "Configuration name",
257
+ },
258
+ "description": {
259
+ "type": "string",
260
+ "maxLength": 1000,
261
+ "description": "Configuration description",
262
+ },
263
+ "extends": {
264
+ "type": "array",
265
+ "items": {"type": "string"},
266
+ "description": "Parent configurations to inherit from",
267
+ },
268
+ "patterns": {
269
+ "type": "object",
270
+ "additionalProperties": {
271
+ "type": "object",
272
+ "required": ["regex"],
273
+ "properties": {
274
+ "name": {"type": "string"},
275
+ "regex": {"type": "string"},
276
+ "priority": {"type": "integer", "minimum": 0, "maximum": 100},
277
+ "data_type": {"type": "string"},
278
+ "min_match_ratio": {
279
+ "type": "number",
280
+ "minimum": 0,
281
+ "maximum": 1,
282
+ },
283
+ "description": {"type": "string"},
284
+ "examples": {
285
+ "type": "array",
286
+ "items": {
287
+ "oneOf": [
288
+ {"type": "string"},
289
+ {
290
+ "type": "object",
291
+ "required": ["value"],
292
+ "properties": {
293
+ "value": {"type": "string"},
294
+ "should_match": {"type": "boolean"},
295
+ "description": {"type": "string"},
296
+ },
297
+ },
298
+ ],
299
+ },
300
+ },
301
+ "tags": {
302
+ "type": "array",
303
+ "items": {"type": "string"},
304
+ },
305
+ "enabled": {"type": "boolean"},
306
+ "case_sensitive": {"type": "boolean"},
307
+ "multiline": {"type": "boolean"},
308
+ },
309
+ },
310
+ "description": "Pattern definitions",
311
+ },
312
+ "groups": {
313
+ "type": "object",
314
+ "additionalProperties": {
315
+ "type": "object",
316
+ "properties": {
317
+ "name": {"type": "string"},
318
+ "description": {"type": "string"},
319
+ "enabled": {"type": "boolean"},
320
+ "priority_boost": {"type": "integer"},
321
+ "patterns": {"type": "object"},
322
+ },
323
+ },
324
+ "description": "Pattern groups",
325
+ },
326
+ "metadata": {
327
+ "type": "object",
328
+ "description": "Additional metadata",
329
+ },
330
+ },
331
+ }
332
+
333
+
334
+ # =============================================================================
335
+ # YAML Parser with Error Tracking
336
+ # =============================================================================
337
+
338
+
339
+ class YAMLParseResult:
340
+ """Result of YAML parsing with error tracking.
341
+
342
+ Attributes:
343
+ data: Parsed data (if successful)
344
+ errors: List of parse errors
345
+ line_mapping: Mapping of JSON paths to line numbers
346
+ """
347
+
348
+ def __init__(self):
349
+ self.data: dict[str, Any] | None = None
350
+ self.errors: list[ValidationError] = []
351
+ self.line_mapping: dict[str, int] = {}
352
+
353
+ @property
354
+ def success(self) -> bool:
355
+ """Check if parsing was successful."""
356
+ return self.data is not None and not any(
357
+ e.severity == ErrorSeverity.ERROR for e in self.errors
358
+ )
359
+
360
+
361
+ def parse_yaml_with_locations(content: str) -> YAMLParseResult:
362
+ """Parse YAML content and track locations.
363
+
364
+ Args:
365
+ content: YAML content string
366
+
367
+ Returns:
368
+ YAMLParseResult with data and location mapping
369
+ """
370
+ result = YAMLParseResult()
371
+
372
+ try:
373
+ import yaml
374
+
375
+ # Try to use ruamel.yaml for better error messages
376
+ try:
377
+ from ruamel.yaml import YAML
378
+ from ruamel.yaml.error import YAMLError
379
+
380
+ yaml_parser = YAML()
381
+ yaml_parser.preserve_quotes = True
382
+
383
+ try:
384
+ data = yaml_parser.load(content)
385
+ result.data = dict(data) if data else {}
386
+ _extract_line_mapping(data, result.line_mapping)
387
+ except YAMLError as e:
388
+ result.errors.append(_convert_ruamel_error(e))
389
+
390
+ except ImportError:
391
+ # Fall back to PyYAML
392
+ try:
393
+ result.data = yaml.safe_load(content) or {}
394
+ _estimate_line_mapping(content, result.data, result.line_mapping)
395
+ except yaml.YAMLError as e:
396
+ result.errors.append(_convert_pyyaml_error(e))
397
+
398
+ except ImportError:
399
+ result.errors.append(ValidationError(
400
+ code=ErrorCode.YAML_SYNTAX,
401
+ message="No YAML parser available. Install PyYAML: pip install pyyaml",
402
+ severity=ErrorSeverity.ERROR,
403
+ suggestion="Run: pip install pyyaml",
404
+ ))
405
+
406
+ return result
407
+
408
+
409
+ def _extract_line_mapping(
410
+ data: Any,
411
+ mapping: dict[str, int],
412
+ path: str = "",
413
+ ) -> None:
414
+ """Extract line mapping from ruamel.yaml data."""
415
+ if hasattr(data, "lc"):
416
+ line = getattr(data.lc, "line", 0)
417
+ if line:
418
+ mapping[path] = line + 1 # Convert to 1-based
419
+
420
+ if isinstance(data, dict):
421
+ for key, value in data.items():
422
+ new_path = f"{path}.{key}" if path else key
423
+ _extract_line_mapping(value, mapping, new_path)
424
+ elif isinstance(data, list):
425
+ for i, item in enumerate(data):
426
+ new_path = f"{path}[{i}]"
427
+ _extract_line_mapping(item, mapping, new_path)
428
+
429
+
430
+ def _estimate_line_mapping(
431
+ content: str,
432
+ data: Any,
433
+ mapping: dict[str, int],
434
+ path: str = "",
435
+ ) -> None:
436
+ """Estimate line mapping by searching for keys in content."""
437
+ if isinstance(data, dict):
438
+ for key, value in data.items():
439
+ new_path = f"{path}.{key}" if path else key
440
+
441
+ # Search for key in content
442
+ pattern = rf'^\s*{re.escape(key)}\s*:'
443
+ for i, line in enumerate(content.split('\n'), 1):
444
+ if re.match(pattern, line):
445
+ mapping[new_path] = i
446
+ break
447
+
448
+ _estimate_line_mapping(content, value, mapping, new_path)
449
+
450
+
451
+ def _convert_pyyaml_error(error: Any) -> ValidationError:
452
+ """Convert PyYAML error to ValidationError."""
453
+ import yaml
454
+
455
+ line = 0
456
+ column = 0
457
+
458
+ if hasattr(error, 'problem_mark') and error.problem_mark:
459
+ line = error.problem_mark.line + 1
460
+ column = error.problem_mark.column + 1
461
+
462
+ message = str(error.problem) if hasattr(error, 'problem') else str(error)
463
+
464
+ # Create helpful suggestions based on common errors
465
+ suggestion = ""
466
+ if "could not find expected ':'" in message:
467
+ suggestion = "Check for missing colons after keys"
468
+ elif "found character" in message and "cannot start" in message:
469
+ suggestion = "Special characters may need to be quoted"
470
+ elif "expected" in message.lower() and "block end" in message.lower():
471
+ suggestion = "Check indentation - YAML uses spaces, not tabs"
472
+
473
+ return ValidationError(
474
+ code=ErrorCode.YAML_SYNTAX,
475
+ message=message,
476
+ severity=ErrorSeverity.ERROR,
477
+ location=SourceLocation(line=line, column=column),
478
+ suggestion=suggestion,
479
+ )
480
+
481
+
482
+ def _convert_ruamel_error(error: Any) -> ValidationError:
483
+ """Convert ruamel.yaml error to ValidationError."""
484
+ line = 0
485
+ column = 0
486
+
487
+ if hasattr(error, 'problem_mark') and error.problem_mark:
488
+ line = error.problem_mark.line + 1
489
+ column = error.problem_mark.column + 1
490
+
491
+ return ValidationError(
492
+ code=ErrorCode.YAML_SYNTAX,
493
+ message=str(error),
494
+ severity=ErrorSeverity.ERROR,
495
+ location=SourceLocation(line=line, column=column),
496
+ )
497
+
498
+
499
+ # =============================================================================
500
+ # Schema Validator
501
+ # =============================================================================
502
+
503
+
504
+ class SchemaValidator:
505
+ """Validates data against JSON Schema with detailed errors.
506
+
507
+ Provides better error messages than standard JSON Schema validators.
508
+ """
509
+
510
+ def __init__(self, schema: dict[str, Any]):
511
+ """Initialize validator.
512
+
513
+ Args:
514
+ schema: JSON Schema dictionary
515
+ """
516
+ self.schema = schema
517
+
518
+ def validate(
519
+ self,
520
+ data: dict[str, Any],
521
+ line_mapping: dict[str, int] | None = None,
522
+ ) -> list[ValidationError]:
523
+ """Validate data against schema.
524
+
525
+ Args:
526
+ data: Data to validate
527
+ line_mapping: Optional mapping of paths to line numbers
528
+
529
+ Returns:
530
+ List of validation errors
531
+ """
532
+ errors = []
533
+ line_mapping = line_mapping or {}
534
+
535
+ self._validate_object(
536
+ data,
537
+ self.schema,
538
+ path="",
539
+ errors=errors,
540
+ line_mapping=line_mapping,
541
+ )
542
+
543
+ return errors
544
+
545
+ def _validate_object(
546
+ self,
547
+ data: Any,
548
+ schema: dict[str, Any],
549
+ path: str,
550
+ errors: list[ValidationError],
551
+ line_mapping: dict[str, int],
552
+ ) -> None:
553
+ """Validate an object against schema."""
554
+ schema_type = schema.get("type", "any")
555
+
556
+ # Type check
557
+ if not self._check_type(data, schema_type):
558
+ expected = schema_type
559
+ actual = type(data).__name__
560
+ errors.append(ValidationError(
561
+ code=ErrorCode.INVALID_TYPE,
562
+ message=f"Expected type '{expected}', got '{actual}'",
563
+ severity=ErrorSeverity.ERROR,
564
+ location=SourceLocation(
565
+ line=line_mapping.get(path, 0),
566
+ path=path,
567
+ ),
568
+ context={"expected": expected, "actual": actual},
569
+ suggestion=self._get_type_suggestion(schema_type, data),
570
+ ))
571
+ return
572
+
573
+ if schema_type == "object" and isinstance(data, dict):
574
+ self._validate_object_properties(
575
+ data, schema, path, errors, line_mapping
576
+ )
577
+
578
+ elif schema_type == "array" and isinstance(data, list):
579
+ self._validate_array_items(
580
+ data, schema, path, errors, line_mapping
581
+ )
582
+
583
+ elif schema_type == "string" and isinstance(data, str):
584
+ self._validate_string(data, schema, path, errors, line_mapping)
585
+
586
+ elif schema_type == "number" or schema_type == "integer":
587
+ self._validate_number(data, schema, path, errors, line_mapping)
588
+
589
+ def _validate_object_properties(
590
+ self,
591
+ data: dict[str, Any],
592
+ schema: dict[str, Any],
593
+ path: str,
594
+ errors: list[ValidationError],
595
+ line_mapping: dict[str, int],
596
+ ) -> None:
597
+ """Validate object properties."""
598
+ properties = schema.get("properties", {})
599
+ required = schema.get("required", [])
600
+ additional_props = schema.get("additionalProperties", True)
601
+
602
+ # Check required properties
603
+ for req in required:
604
+ if req not in data:
605
+ errors.append(ValidationError(
606
+ code=ErrorCode.MISSING_REQUIRED,
607
+ message=f"Missing required field: '{req}'",
608
+ severity=ErrorSeverity.ERROR,
609
+ location=SourceLocation(
610
+ line=line_mapping.get(path, 0),
611
+ path=path,
612
+ ),
613
+ context={"field": req},
614
+ suggestion=f"Add the required field '{req}'",
615
+ ))
616
+
617
+ # Validate each property
618
+ for key, value in data.items():
619
+ prop_path = f"{path}.{key}" if path else key
620
+
621
+ if key in properties:
622
+ self._validate_object(
623
+ value,
624
+ properties[key],
625
+ prop_path,
626
+ errors,
627
+ line_mapping,
628
+ )
629
+ elif isinstance(additional_props, dict):
630
+ self._validate_object(
631
+ value,
632
+ additional_props,
633
+ prop_path,
634
+ errors,
635
+ line_mapping,
636
+ )
637
+ elif not additional_props:
638
+ errors.append(ValidationError(
639
+ code=ErrorCode.UNKNOWN_FIELD,
640
+ message=f"Unknown field: '{key}'",
641
+ severity=ErrorSeverity.WARNING,
642
+ location=SourceLocation(
643
+ line=line_mapping.get(prop_path, 0),
644
+ path=prop_path,
645
+ ),
646
+ suggestion=f"Remove unknown field or check spelling",
647
+ ))
648
+
649
+ def _validate_array_items(
650
+ self,
651
+ data: list[Any],
652
+ schema: dict[str, Any],
653
+ path: str,
654
+ errors: list[ValidationError],
655
+ line_mapping: dict[str, int],
656
+ ) -> None:
657
+ """Validate array items."""
658
+ items_schema = schema.get("items")
659
+
660
+ if items_schema:
661
+ for i, item in enumerate(data):
662
+ item_path = f"{path}[{i}]"
663
+
664
+ # Handle oneOf
665
+ if "oneOf" in items_schema:
666
+ valid = False
667
+ for option in items_schema["oneOf"]:
668
+ test_errors: list[ValidationError] = []
669
+ self._validate_object(
670
+ item, option, item_path, test_errors, line_mapping
671
+ )
672
+ if not test_errors:
673
+ valid = True
674
+ break
675
+
676
+ if not valid:
677
+ errors.append(ValidationError(
678
+ code=ErrorCode.INVALID_VALUE,
679
+ message="Value doesn't match any allowed format",
680
+ severity=ErrorSeverity.ERROR,
681
+ location=SourceLocation(
682
+ line=line_mapping.get(item_path, 0),
683
+ path=item_path,
684
+ ),
685
+ ))
686
+ else:
687
+ self._validate_object(
688
+ item, items_schema, item_path, errors, line_mapping
689
+ )
690
+
691
+ def _validate_string(
692
+ self,
693
+ data: str,
694
+ schema: dict[str, Any],
695
+ path: str,
696
+ errors: list[ValidationError],
697
+ line_mapping: dict[str, int],
698
+ ) -> None:
699
+ """Validate string value."""
700
+ min_length = schema.get("minLength", 0)
701
+ max_length = schema.get("maxLength", float("inf"))
702
+ pattern = schema.get("pattern")
703
+
704
+ if len(data) < min_length:
705
+ errors.append(ValidationError(
706
+ code=ErrorCode.CONSTRAINT_VIOLATION,
707
+ message=f"String too short (min: {min_length})",
708
+ severity=ErrorSeverity.ERROR,
709
+ location=SourceLocation(
710
+ line=line_mapping.get(path, 0),
711
+ path=path,
712
+ ),
713
+ ))
714
+
715
+ if len(data) > max_length:
716
+ errors.append(ValidationError(
717
+ code=ErrorCode.CONSTRAINT_VIOLATION,
718
+ message=f"String too long (max: {max_length})",
719
+ severity=ErrorSeverity.ERROR,
720
+ location=SourceLocation(
721
+ line=line_mapping.get(path, 0),
722
+ path=path,
723
+ ),
724
+ ))
725
+
726
+ if pattern and not re.match(pattern, data):
727
+ errors.append(ValidationError(
728
+ code=ErrorCode.CONSTRAINT_VIOLATION,
729
+ message=f"String doesn't match pattern: {pattern}",
730
+ severity=ErrorSeverity.ERROR,
731
+ location=SourceLocation(
732
+ line=line_mapping.get(path, 0),
733
+ path=path,
734
+ ),
735
+ ))
736
+
737
+ def _validate_number(
738
+ self,
739
+ data: int | float,
740
+ schema: dict[str, Any],
741
+ path: str,
742
+ errors: list[ValidationError],
743
+ line_mapping: dict[str, int],
744
+ ) -> None:
745
+ """Validate number value."""
746
+ minimum = schema.get("minimum")
747
+ maximum = schema.get("maximum")
748
+
749
+ if minimum is not None and data < minimum:
750
+ errors.append(ValidationError(
751
+ code=ErrorCode.CONSTRAINT_VIOLATION,
752
+ message=f"Value {data} is below minimum {minimum}",
753
+ severity=ErrorSeverity.ERROR,
754
+ location=SourceLocation(
755
+ line=line_mapping.get(path, 0),
756
+ path=path,
757
+ ),
758
+ ))
759
+
760
+ if maximum is not None and data > maximum:
761
+ errors.append(ValidationError(
762
+ code=ErrorCode.CONSTRAINT_VIOLATION,
763
+ message=f"Value {data} exceeds maximum {maximum}",
764
+ severity=ErrorSeverity.ERROR,
765
+ location=SourceLocation(
766
+ line=line_mapping.get(path, 0),
767
+ path=path,
768
+ ),
769
+ ))
770
+
771
+ def _check_type(self, value: Any, expected_type: str) -> bool:
772
+ """Check if value matches expected type."""
773
+ if expected_type == "any":
774
+ return True
775
+
776
+ type_mapping = {
777
+ "string": str,
778
+ "integer": int,
779
+ "number": (int, float),
780
+ "boolean": bool,
781
+ "array": list,
782
+ "object": dict,
783
+ "null": type(None),
784
+ }
785
+
786
+ expected = type_mapping.get(expected_type)
787
+ if expected is None:
788
+ return True
789
+
790
+ # Special case: integers are valid numbers
791
+ if expected_type == "number" and isinstance(value, bool):
792
+ return False
793
+
794
+ return isinstance(value, expected)
795
+
796
+ def _get_type_suggestion(self, expected: str, actual: Any) -> str:
797
+ """Get suggestion for type mismatch."""
798
+ suggestions = {
799
+ "string": "Wrap value in quotes",
800
+ "integer": "Remove quotes or decimal point",
801
+ "number": "Use a numeric value",
802
+ "boolean": "Use 'true' or 'false' (lowercase)",
803
+ "array": "Use YAML list syntax (- item)",
804
+ "object": "Use YAML object syntax (key: value)",
805
+ }
806
+ return suggestions.get(expected, "")
807
+
808
+
809
+ # =============================================================================
810
+ # Pattern-Specific Validation
811
+ # =============================================================================
812
+
813
+
814
+ class PatternValidator:
815
+ """Validates pattern-specific rules."""
816
+
817
+ def validate(
818
+ self,
819
+ data: dict[str, Any],
820
+ line_mapping: dict[str, int] | None = None,
821
+ ) -> list[ValidationError]:
822
+ """Validate pattern configuration.
823
+
824
+ Args:
825
+ data: Parsed pattern configuration
826
+ line_mapping: Line number mapping
827
+
828
+ Returns:
829
+ List of validation errors
830
+ """
831
+ errors = []
832
+ line_mapping = line_mapping or {}
833
+ seen_ids: set[str] = set()
834
+
835
+ patterns = data.get("patterns", {})
836
+ groups = data.get("groups", {})
837
+
838
+ # Validate patterns
839
+ for pattern_id, pattern in patterns.items():
840
+ path = f"patterns.{pattern_id}"
841
+ errors.extend(self._validate_pattern(
842
+ pattern_id, pattern, path, line_mapping
843
+ ))
844
+
845
+ # Check for duplicates
846
+ if pattern_id in seen_ids:
847
+ errors.append(ValidationError(
848
+ code=ErrorCode.DUPLICATE_ID,
849
+ message=f"Duplicate pattern ID: '{pattern_id}'",
850
+ severity=ErrorSeverity.ERROR,
851
+ location=SourceLocation(
852
+ line=line_mapping.get(path, 0),
853
+ path=path,
854
+ ),
855
+ ))
856
+ seen_ids.add(pattern_id)
857
+
858
+ # Validate groups
859
+ for group_id, group in groups.items():
860
+ group_path = f"groups.{group_id}"
861
+
862
+ if "patterns" in group:
863
+ for pattern_id, pattern in group["patterns"].items():
864
+ pattern_path = f"{group_path}.patterns.{pattern_id}"
865
+ errors.extend(self._validate_pattern(
866
+ pattern_id, pattern, pattern_path, line_mapping
867
+ ))
868
+
869
+ if pattern_id in seen_ids:
870
+ errors.append(ValidationError(
871
+ code=ErrorCode.DUPLICATE_ID,
872
+ message=f"Duplicate pattern ID: '{pattern_id}'",
873
+ severity=ErrorSeverity.WARNING,
874
+ location=SourceLocation(
875
+ line=line_mapping.get(pattern_path, 0),
876
+ path=pattern_path,
877
+ ),
878
+ suggestion="Pattern IDs should be unique across all groups",
879
+ ))
880
+ seen_ids.add(pattern_id)
881
+
882
+ # Check for circular extends
883
+ extends = data.get("extends", [])
884
+ if extends:
885
+ errors.extend(self._check_circular_extends(extends, line_mapping))
886
+
887
+ return errors
888
+
889
+ def _validate_pattern(
890
+ self,
891
+ pattern_id: str,
892
+ pattern: dict[str, Any],
893
+ path: str,
894
+ line_mapping: dict[str, int],
895
+ ) -> list[ValidationError]:
896
+ """Validate a single pattern."""
897
+ errors = []
898
+
899
+ # Validate regex
900
+ regex = pattern.get("regex", "")
901
+ if regex:
902
+ regex_errors = self._validate_regex(regex, path, line_mapping)
903
+ errors.extend(regex_errors)
904
+
905
+ # Validate examples if regex is valid
906
+ if not regex_errors:
907
+ errors.extend(self._validate_examples(
908
+ regex, pattern.get("examples", []), path, line_mapping
909
+ ))
910
+
911
+ return errors
912
+
913
+ def _validate_regex(
914
+ self,
915
+ regex: str,
916
+ path: str,
917
+ line_mapping: dict[str, int],
918
+ ) -> list[ValidationError]:
919
+ """Validate regex pattern."""
920
+ errors = []
921
+
922
+ try:
923
+ compiled = re.compile(regex)
924
+
925
+ # Check for potentially problematic patterns
926
+ if regex.startswith(".*") and regex.endswith(".*"):
927
+ errors.append(ValidationError(
928
+ code=ErrorCode.REGEX_COMPLEXITY,
929
+ message="Pattern starts and ends with '.*' - may match unintended strings",
930
+ severity=ErrorSeverity.WARNING,
931
+ location=SourceLocation(
932
+ line=line_mapping.get(f"{path}.regex", 0),
933
+ path=f"{path}.regex",
934
+ ),
935
+ suggestion="Consider using anchors (^ and $) for more precise matching",
936
+ ))
937
+
938
+ # Check for catastrophic backtracking potential
939
+ if re.search(r"\(.*\+.*\)\+|\(.*\*.*\)\*", regex):
940
+ errors.append(ValidationError(
941
+ code=ErrorCode.REGEX_COMPLEXITY,
942
+ message="Pattern may cause catastrophic backtracking",
943
+ severity=ErrorSeverity.WARNING,
944
+ location=SourceLocation(
945
+ line=line_mapping.get(f"{path}.regex", 0),
946
+ path=f"{path}.regex",
947
+ ),
948
+ suggestion="Avoid nested quantifiers like (a+)+",
949
+ ))
950
+
951
+ except re.error as e:
952
+ errors.append(ValidationError(
953
+ code=ErrorCode.INVALID_REGEX,
954
+ message=f"Invalid regular expression: {e}",
955
+ severity=ErrorSeverity.ERROR,
956
+ location=SourceLocation(
957
+ line=line_mapping.get(f"{path}.regex", 0),
958
+ path=f"{path}.regex",
959
+ ),
960
+ context={"regex": regex, "error": str(e)},
961
+ suggestion=self._get_regex_suggestion(str(e)),
962
+ ))
963
+
964
+ return errors
965
+
966
+ def _validate_examples(
967
+ self,
968
+ regex: str,
969
+ examples: list[Any],
970
+ path: str,
971
+ line_mapping: dict[str, int],
972
+ ) -> list[ValidationError]:
973
+ """Validate examples against regex."""
974
+ errors = []
975
+
976
+ try:
977
+ compiled = re.compile(regex)
978
+ except re.error:
979
+ return errors # Regex error already reported
980
+
981
+ for i, example in enumerate(examples):
982
+ example_path = f"{path}.examples[{i}]"
983
+
984
+ if isinstance(example, str):
985
+ value = example
986
+ should_match = True
987
+ elif isinstance(example, dict):
988
+ value = example.get("value", "")
989
+ should_match = example.get("should_match", True)
990
+ else:
991
+ continue
992
+
993
+ actual_match = bool(compiled.match(value))
994
+
995
+ if actual_match != should_match:
996
+ expected = "match" if should_match else "not match"
997
+ actual = "matches" if actual_match else "doesn't match"
998
+
999
+ errors.append(ValidationError(
1000
+ code=ErrorCode.EXAMPLE_MISMATCH,
1001
+ message=f"Example '{value}' should {expected} but {actual}",
1002
+ severity=ErrorSeverity.ERROR,
1003
+ location=SourceLocation(
1004
+ line=line_mapping.get(example_path, 0),
1005
+ path=example_path,
1006
+ ),
1007
+ context={
1008
+ "value": value,
1009
+ "should_match": should_match,
1010
+ "actual_match": actual_match,
1011
+ },
1012
+ ))
1013
+
1014
+ return errors
1015
+
1016
+ def _check_circular_extends(
1017
+ self,
1018
+ extends: list[str],
1019
+ line_mapping: dict[str, int],
1020
+ ) -> list[ValidationError]:
1021
+ """Check for circular extends references."""
1022
+ # Note: Full circular detection would require loading referenced files
1023
+ # This is a placeholder for the detection logic
1024
+ return []
1025
+
1026
+ def _get_regex_suggestion(self, error_message: str) -> str:
1027
+ """Get suggestion for regex error."""
1028
+ suggestions = {
1029
+ "unterminated": "Check for missing closing brackets, parentheses, or quotes",
1030
+ "unbalanced": "Count opening and closing brackets/parentheses",
1031
+ "nothing to repeat": "Quantifiers (+, *, ?) need something to repeat",
1032
+ "bad escape": "Use double backslash (\\\\) or raw string (r'...')",
1033
+ "unknown group": "Check group reference syntax: (?P<name>...) or (?:...)",
1034
+ }
1035
+
1036
+ for pattern, suggestion in suggestions.items():
1037
+ if pattern in error_message.lower():
1038
+ return suggestion
1039
+
1040
+ return "Check regex syntax at https://regex101.com"
1041
+
1042
+
1043
+ # =============================================================================
1044
+ # Main Validation Function
1045
+ # =============================================================================
1046
+
1047
+
1048
+ def validate_pattern_yaml(
1049
+ content: str,
1050
+ source_path: str = "",
1051
+ strict: bool = False,
1052
+ ) -> dict[str, Any]:
1053
+ """Validate pattern YAML content.
1054
+
1055
+ Args:
1056
+ content: YAML content string
1057
+ source_path: Source file path for error messages
1058
+ strict: Treat warnings as errors
1059
+
1060
+ Returns:
1061
+ Parsed and validated data
1062
+
1063
+ Raises:
1064
+ YAMLValidationException: If validation fails
1065
+ """
1066
+ # Parse YAML
1067
+ parse_result = parse_yaml_with_locations(content)
1068
+
1069
+ if not parse_result.success:
1070
+ raise YAMLValidationException(parse_result.errors, source_path)
1071
+
1072
+ # Schema validation
1073
+ schema_validator = SchemaValidator(PATTERN_SCHEMA)
1074
+ schema_errors = schema_validator.validate(
1075
+ parse_result.data,
1076
+ parse_result.line_mapping,
1077
+ )
1078
+
1079
+ # Pattern-specific validation
1080
+ pattern_validator = PatternValidator()
1081
+ pattern_errors = pattern_validator.validate(
1082
+ parse_result.data,
1083
+ parse_result.line_mapping,
1084
+ )
1085
+
1086
+ all_errors = parse_result.errors + schema_errors + pattern_errors
1087
+
1088
+ # Filter by severity
1089
+ if strict:
1090
+ errors = all_errors
1091
+ else:
1092
+ errors = [e for e in all_errors if e.severity == ErrorSeverity.ERROR]
1093
+
1094
+ if errors:
1095
+ raise YAMLValidationException(all_errors, source_path)
1096
+
1097
+ return parse_result.data
1098
+
1099
+
1100
+ def validate_pattern_file(
1101
+ file_path: str | Path,
1102
+ strict: bool = False,
1103
+ ) -> dict[str, Any]:
1104
+ """Validate a pattern YAML file.
1105
+
1106
+ Args:
1107
+ file_path: Path to YAML file
1108
+ strict: Treat warnings as errors
1109
+
1110
+ Returns:
1111
+ Parsed and validated data
1112
+
1113
+ Raises:
1114
+ YAMLValidationException: If validation fails
1115
+ FileNotFoundError: If file doesn't exist
1116
+ """
1117
+ file_path = Path(file_path)
1118
+
1119
+ if not file_path.exists():
1120
+ raise YAMLValidationException(
1121
+ [ValidationError(
1122
+ code=ErrorCode.FILE_NOT_FOUND,
1123
+ message=f"File not found: {file_path}",
1124
+ severity=ErrorSeverity.ERROR,
1125
+ )],
1126
+ str(file_path),
1127
+ )
1128
+
1129
+ try:
1130
+ content = file_path.read_text(encoding="utf-8")
1131
+ except PermissionError:
1132
+ raise YAMLValidationException(
1133
+ [ValidationError(
1134
+ code=ErrorCode.PERMISSION_DENIED,
1135
+ message=f"Permission denied: {file_path}",
1136
+ severity=ErrorSeverity.ERROR,
1137
+ )],
1138
+ str(file_path),
1139
+ )
1140
+ except UnicodeDecodeError as e:
1141
+ raise YAMLValidationException(
1142
+ [ValidationError(
1143
+ code=ErrorCode.ENCODING_ERROR,
1144
+ message=f"Encoding error: {e}",
1145
+ severity=ErrorSeverity.ERROR,
1146
+ suggestion="Ensure the file is saved as UTF-8",
1147
+ )],
1148
+ str(file_path),
1149
+ )
1150
+
1151
+ return validate_pattern_yaml(content, str(file_path), strict)