truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1170 @@
1
+ """Custom pattern configuration via YAML files.
2
+
3
+ This module provides a flexible system for defining custom validation patterns
4
+ through YAML configuration files, eliminating the need for code changes.
5
+
6
+ Key features:
7
+ - YAML-based pattern definitions
8
+ - Hierarchical pattern organization
9
+ - Pattern inheritance and composition
10
+ - Hot-reload support for development
11
+ - Pattern validation and testing
12
+
13
+ Example YAML configuration:
14
+ patterns:
15
+ korean_phone:
16
+ name: Korean Phone Number
17
+ regex: "^01[0-9]-[0-9]{3,4}-[0-9]{4}$"
18
+ priority: 90
19
+ data_type: korean_phone
20
+ examples:
21
+ - "010-1234-5678"
22
+ - "011-123-4567"
23
+
24
+ email:
25
+ name: Email Address
26
+ regex: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
27
+ priority: 85
28
+ data_type: email
29
+
30
+ Example usage:
31
+ from truthound.profiler.custom_patterns import (
32
+ PatternConfig,
33
+ load_patterns,
34
+ PatternConfigLoader,
35
+ )
36
+
37
+ # Load patterns from YAML
38
+ patterns = load_patterns("patterns.yaml")
39
+
40
+ # Or use the loader for more control
41
+ loader = PatternConfigLoader()
42
+ loader.load_file("patterns.yaml")
43
+ loader.load_directory("patterns/")
44
+
45
+ # Get all patterns
46
+ all_patterns = loader.get_all_patterns()
47
+ """
48
+
49
+ from __future__ import annotations
50
+
51
+ import os
52
+ import re
53
+ import threading
54
+ import time
55
+ from dataclasses import dataclass, field
56
+ from datetime import datetime
57
+ from enum import Enum
58
+ from pathlib import Path
59
+ from typing import Any, Callable
60
+
61
+ from truthound.profiler.base import DataType
62
+
63
+
64
+ # =============================================================================
65
+ # Pattern Configuration Types
66
+ # =============================================================================
67
+
68
+
69
+ class PatternPriority(int, Enum):
70
+ """Priority levels for pattern matching."""
71
+
72
+ HIGHEST = 100
73
+ HIGH = 90
74
+ MEDIUM = 50
75
+ LOW = 25
76
+ LOWEST = 10
77
+
78
+
79
+ @dataclass
80
+ class PatternExample:
81
+ """Example value for pattern testing."""
82
+
83
+ value: str
84
+ should_match: bool = True
85
+ description: str = ""
86
+
87
+
88
+ @dataclass
89
+ class PatternConfig:
90
+ """Configuration for a single pattern.
91
+
92
+ Attributes:
93
+ name: Human-readable pattern name
94
+ pattern_id: Unique identifier for the pattern
95
+ regex: Regular expression pattern
96
+ priority: Matching priority (higher = checked first)
97
+ data_type: Inferred data type when pattern matches
98
+ min_match_ratio: Minimum ratio of values that must match
99
+ description: Pattern description
100
+ examples: Example values for testing
101
+ tags: Tags for categorization
102
+ enabled: Whether pattern is active
103
+ case_sensitive: Whether regex is case-sensitive
104
+ multiline: Whether regex uses multiline mode
105
+ validator_fn: Optional custom validator function name
106
+ metadata: Additional metadata
107
+ """
108
+
109
+ name: str
110
+ pattern_id: str
111
+ regex: str
112
+ priority: int = PatternPriority.MEDIUM
113
+ data_type: str = "string"
114
+ min_match_ratio: float = 0.8
115
+ description: str = ""
116
+ examples: list[PatternExample] = field(default_factory=list)
117
+ tags: list[str] = field(default_factory=list)
118
+ enabled: bool = True
119
+ case_sensitive: bool = True
120
+ multiline: bool = False
121
+ validator_fn: str | None = None
122
+ metadata: dict[str, Any] = field(default_factory=dict)
123
+
124
+ # Internal
125
+ _compiled_regex: re.Pattern | None = field(default=None, repr=False)
126
+
127
+ def __post_init__(self) -> None:
128
+ """Compile regex pattern."""
129
+ self._compile_regex()
130
+
131
+ def _compile_regex(self) -> None:
132
+ """Compile the regex pattern."""
133
+ flags = 0
134
+ if not self.case_sensitive:
135
+ flags |= re.IGNORECASE
136
+ if self.multiline:
137
+ flags |= re.MULTILINE
138
+
139
+ try:
140
+ self._compiled_regex = re.compile(self.regex, flags)
141
+ except re.error as e:
142
+ raise ValueError(f"Invalid regex for pattern '{self.name}': {e}")
143
+
144
+ @property
145
+ def compiled_regex(self) -> re.Pattern:
146
+ """Get compiled regex pattern."""
147
+ if self._compiled_regex is None:
148
+ self._compile_regex()
149
+ return self._compiled_regex # type: ignore
150
+
151
+ def matches(self, value: str) -> bool:
152
+ """Check if value matches the pattern."""
153
+ if value is None:
154
+ return False
155
+ try:
156
+ return bool(self.compiled_regex.match(str(value)))
157
+ except Exception:
158
+ return False
159
+
160
+ def get_data_type(self) -> DataType:
161
+ """Get the DataType enum value."""
162
+ try:
163
+ return DataType(self.data_type)
164
+ except ValueError:
165
+ return DataType.STRING
166
+
167
+ def validate_examples(self) -> list[tuple[str, bool, str]]:
168
+ """Validate all examples against the pattern.
169
+
170
+ Returns:
171
+ List of (value, passed, message) tuples
172
+ """
173
+ results = []
174
+ for example in self.examples:
175
+ actual = self.matches(example.value)
176
+ passed = actual == example.should_match
177
+
178
+ if passed:
179
+ message = "OK"
180
+ else:
181
+ expected = "match" if example.should_match else "not match"
182
+ got = "matched" if actual else "did not match"
183
+ message = f"Expected {expected}, but {got}"
184
+
185
+ results.append((example.value, passed, message))
186
+
187
+ return results
188
+
189
+ def to_dict(self) -> dict[str, Any]:
190
+ """Convert to dictionary for serialization."""
191
+ return {
192
+ "name": self.name,
193
+ "pattern_id": self.pattern_id,
194
+ "regex": self.regex,
195
+ "priority": self.priority,
196
+ "data_type": self.data_type,
197
+ "min_match_ratio": self.min_match_ratio,
198
+ "description": self.description,
199
+ "examples": [
200
+ {
201
+ "value": e.value,
202
+ "should_match": e.should_match,
203
+ "description": e.description,
204
+ }
205
+ for e in self.examples
206
+ ],
207
+ "tags": self.tags,
208
+ "enabled": self.enabled,
209
+ "case_sensitive": self.case_sensitive,
210
+ "multiline": self.multiline,
211
+ "validator_fn": self.validator_fn,
212
+ "metadata": self.metadata,
213
+ }
214
+
215
+ @classmethod
216
+ def from_dict(cls, data: dict[str, Any], pattern_id: str | None = None) -> "PatternConfig":
217
+ """Create from dictionary."""
218
+ examples = []
219
+ for ex in data.get("examples", []):
220
+ if isinstance(ex, str):
221
+ examples.append(PatternExample(value=ex))
222
+ elif isinstance(ex, dict):
223
+ examples.append(PatternExample(
224
+ value=ex.get("value", ""),
225
+ should_match=ex.get("should_match", True),
226
+ description=ex.get("description", ""),
227
+ ))
228
+
229
+ return cls(
230
+ name=data.get("name", pattern_id or "unnamed"),
231
+ pattern_id=pattern_id or data.get("pattern_id", data.get("name", "unnamed")),
232
+ regex=data["regex"],
233
+ priority=data.get("priority", PatternPriority.MEDIUM),
234
+ data_type=data.get("data_type", "string"),
235
+ min_match_ratio=data.get("min_match_ratio", 0.8),
236
+ description=data.get("description", ""),
237
+ examples=examples,
238
+ tags=data.get("tags", []),
239
+ enabled=data.get("enabled", True),
240
+ case_sensitive=data.get("case_sensitive", True),
241
+ multiline=data.get("multiline", False),
242
+ validator_fn=data.get("validator_fn"),
243
+ metadata=data.get("metadata", {}),
244
+ )
245
+
246
+
247
+ # =============================================================================
248
+ # Pattern Group Configuration
249
+ # =============================================================================
250
+
251
+
252
+ @dataclass
253
+ class PatternGroup:
254
+ """Group of related patterns.
255
+
256
+ Allows organizing patterns into logical categories.
257
+ """
258
+
259
+ name: str
260
+ group_id: str
261
+ description: str = ""
262
+ patterns: list[PatternConfig] = field(default_factory=list)
263
+ enabled: bool = True
264
+ priority_boost: int = 0 # Added to all pattern priorities
265
+ tags: list[str] = field(default_factory=list)
266
+ metadata: dict[str, Any] = field(default_factory=dict)
267
+
268
+ def get_patterns(self, include_disabled: bool = False) -> list[PatternConfig]:
269
+ """Get patterns in this group.
270
+
271
+ Args:
272
+ include_disabled: Whether to include disabled patterns
273
+
274
+ Returns:
275
+ List of patterns
276
+ """
277
+ if not self.enabled:
278
+ return []
279
+
280
+ patterns = []
281
+ for p in self.patterns:
282
+ if p.enabled or include_disabled:
283
+ # Apply priority boost
284
+ if self.priority_boost != 0:
285
+ p = PatternConfig(
286
+ **{
287
+ **p.to_dict(),
288
+ "priority": p.priority + self.priority_boost,
289
+ }
290
+ )
291
+ patterns.append(p)
292
+
293
+ return patterns
294
+
295
+ def to_dict(self) -> dict[str, Any]:
296
+ """Convert to dictionary."""
297
+ return {
298
+ "name": self.name,
299
+ "group_id": self.group_id,
300
+ "description": self.description,
301
+ "patterns": [p.to_dict() for p in self.patterns],
302
+ "enabled": self.enabled,
303
+ "priority_boost": self.priority_boost,
304
+ "tags": self.tags,
305
+ "metadata": self.metadata,
306
+ }
307
+
308
+ @classmethod
309
+ def from_dict(cls, data: dict[str, Any], group_id: str | None = None) -> "PatternGroup":
310
+ """Create from dictionary."""
311
+ patterns = []
312
+ for pattern_id, pattern_data in data.get("patterns", {}).items():
313
+ patterns.append(PatternConfig.from_dict(pattern_data, pattern_id))
314
+
315
+ return cls(
316
+ name=data.get("name", group_id or "unnamed"),
317
+ group_id=group_id or data.get("group_id", data.get("name", "unnamed")),
318
+ description=data.get("description", ""),
319
+ patterns=patterns,
320
+ enabled=data.get("enabled", True),
321
+ priority_boost=data.get("priority_boost", 0),
322
+ tags=data.get("tags", []),
323
+ metadata=data.get("metadata", {}),
324
+ )
325
+
326
+
327
+ # =============================================================================
328
+ # YAML Configuration Schema
329
+ # =============================================================================
330
+
331
+
332
+ @dataclass
333
+ class PatternConfigSchema:
334
+ """Complete pattern configuration schema.
335
+
336
+ Represents a full YAML configuration file.
337
+ """
338
+
339
+ version: str = "1.0"
340
+ name: str = ""
341
+ description: str = ""
342
+ patterns: dict[str, PatternConfig] = field(default_factory=dict)
343
+ groups: dict[str, PatternGroup] = field(default_factory=dict)
344
+ extends: list[str] = field(default_factory=list) # Parent configs to inherit from
345
+ metadata: dict[str, Any] = field(default_factory=dict)
346
+ loaded_at: datetime = field(default_factory=datetime.now)
347
+ source_path: str = ""
348
+
349
+ def get_all_patterns(self, include_disabled: bool = False) -> list[PatternConfig]:
350
+ """Get all patterns from this configuration.
351
+
352
+ Args:
353
+ include_disabled: Whether to include disabled patterns
354
+
355
+ Returns:
356
+ List of all patterns, sorted by priority
357
+ """
358
+ all_patterns = []
359
+
360
+ # Add standalone patterns
361
+ for p in self.patterns.values():
362
+ if p.enabled or include_disabled:
363
+ all_patterns.append(p)
364
+
365
+ # Add patterns from groups
366
+ for group in self.groups.values():
367
+ all_patterns.extend(group.get_patterns(include_disabled))
368
+
369
+ # Sort by priority (highest first)
370
+ return sorted(all_patterns, key=lambda p: p.priority, reverse=True)
371
+
372
+ def get_pattern(self, pattern_id: str) -> PatternConfig | None:
373
+ """Get a specific pattern by ID."""
374
+ if pattern_id in self.patterns:
375
+ return self.patterns[pattern_id]
376
+
377
+ for group in self.groups.values():
378
+ for p in group.patterns:
379
+ if p.pattern_id == pattern_id:
380
+ return p
381
+
382
+ return None
383
+
384
+ def to_dict(self) -> dict[str, Any]:
385
+ """Convert to dictionary."""
386
+ return {
387
+ "version": self.version,
388
+ "name": self.name,
389
+ "description": self.description,
390
+ "patterns": {pid: p.to_dict() for pid, p in self.patterns.items()},
391
+ "groups": {gid: g.to_dict() for gid, g in self.groups.items()},
392
+ "extends": self.extends,
393
+ "metadata": self.metadata,
394
+ }
395
+
396
+
397
+ # =============================================================================
398
+ # YAML Parser
399
+ # =============================================================================
400
+
401
+
402
+ def _parse_yaml(content: str) -> dict[str, Any]:
403
+ """Parse YAML content.
404
+
405
+ Supports PyYAML if available, otherwise uses basic parsing.
406
+ """
407
+ try:
408
+ import yaml
409
+ return yaml.safe_load(content) or {}
410
+ except ImportError:
411
+ # Basic YAML parsing for simple structures
412
+ return _basic_yaml_parse(content)
413
+
414
+
415
+ def _basic_yaml_parse(content: str) -> dict[str, Any]:
416
+ """Basic YAML parser for simple structures.
417
+
418
+ This is a fallback when PyYAML is not installed.
419
+ Only supports simple key-value pairs and lists.
420
+ """
421
+ import json
422
+
423
+ # Try JSON first (YAML is a superset)
424
+ try:
425
+ return json.loads(content)
426
+ except json.JSONDecodeError:
427
+ pass
428
+
429
+ # Very basic YAML parsing
430
+ result: dict[str, Any] = {}
431
+ current_key: str | None = None
432
+ current_indent = 0
433
+
434
+ for line in content.split("\n"):
435
+ stripped = line.strip()
436
+ if not stripped or stripped.startswith("#"):
437
+ continue
438
+
439
+ indent = len(line) - len(line.lstrip())
440
+
441
+ if ":" in stripped:
442
+ key, _, value = stripped.partition(":")
443
+ key = key.strip()
444
+ value = value.strip()
445
+
446
+ if value:
447
+ # Remove quotes
448
+ if value.startswith('"') and value.endswith('"'):
449
+ value = value[1:-1]
450
+ elif value.startswith("'") and value.endswith("'"):
451
+ value = value[1:-1]
452
+
453
+ result[key] = value
454
+ else:
455
+ result[key] = {}
456
+ current_key = key
457
+ current_indent = indent
458
+
459
+ return result
460
+
461
+
462
+ def _dump_yaml(data: dict[str, Any]) -> str:
463
+ """Dump dictionary to YAML string."""
464
+ try:
465
+ import yaml
466
+ return yaml.dump(data, default_flow_style=False, allow_unicode=True)
467
+ except ImportError:
468
+ import json
469
+ return json.dumps(data, indent=2, ensure_ascii=False)
470
+
471
+
472
+ # =============================================================================
473
+ # Configuration Loader
474
+ # =============================================================================
475
+
476
+
477
+ class PatternConfigLoader:
478
+ """Loads pattern configurations from YAML files.
479
+
480
+ Supports:
481
+ - Single file loading
482
+ - Directory scanning
483
+ - Configuration inheritance
484
+ - Hot-reload for development
485
+
486
+ Example:
487
+ loader = PatternConfigLoader()
488
+ loader.load_file("patterns.yaml")
489
+ loader.load_directory("patterns/")
490
+
491
+ # Get all patterns
492
+ patterns = loader.get_all_patterns()
493
+
494
+ # Enable hot-reload
495
+ loader.enable_hot_reload(interval=5.0)
496
+ """
497
+
498
+ def __init__(
499
+ self,
500
+ auto_validate: bool = True,
501
+ strict_mode: bool = False,
502
+ ):
503
+ """Initialize loader.
504
+
505
+ Args:
506
+ auto_validate: Validate patterns on load
507
+ strict_mode: Fail on any validation error
508
+ """
509
+ self.auto_validate = auto_validate
510
+ self.strict_mode = strict_mode
511
+
512
+ self._configs: dict[str, PatternConfigSchema] = {}
513
+ self._file_mtimes: dict[str, float] = {}
514
+ self._hot_reload_enabled = False
515
+ self._hot_reload_thread: threading.Thread | None = None
516
+ self._lock = threading.Lock()
517
+ self._stop_hot_reload = threading.Event()
518
+
519
+ def load_file(self, path: str | Path) -> PatternConfigSchema:
520
+ """Load patterns from a YAML file.
521
+
522
+ Args:
523
+ path: Path to YAML file
524
+
525
+ Returns:
526
+ Loaded configuration
527
+ """
528
+ path = Path(path)
529
+ if not path.exists():
530
+ raise FileNotFoundError(f"Pattern config not found: {path}")
531
+
532
+ with open(path, "r", encoding="utf-8") as f:
533
+ content = f.read()
534
+
535
+ data = _parse_yaml(content)
536
+ config = self._parse_config(data, str(path))
537
+
538
+ if self.auto_validate:
539
+ errors = self.validate_config(config)
540
+ if errors and self.strict_mode:
541
+ raise ValueError(f"Pattern validation errors: {errors}")
542
+
543
+ with self._lock:
544
+ self._configs[str(path)] = config
545
+ self._file_mtimes[str(path)] = path.stat().st_mtime
546
+
547
+ return config
548
+
549
+ def load_directory(
550
+ self,
551
+ directory: str | Path,
552
+ pattern: str = "*.yaml",
553
+ recursive: bool = True,
554
+ ) -> list[PatternConfigSchema]:
555
+ """Load all pattern files from a directory.
556
+
557
+ Args:
558
+ directory: Directory to scan
559
+ pattern: Glob pattern for files
560
+ recursive: Whether to search recursively
561
+
562
+ Returns:
563
+ List of loaded configurations
564
+ """
565
+ directory = Path(directory)
566
+ if not directory.exists():
567
+ return []
568
+
569
+ configs = []
570
+ glob_method = directory.rglob if recursive else directory.glob
571
+
572
+ for file_path in glob_method(pattern):
573
+ if file_path.is_file():
574
+ try:
575
+ config = self.load_file(file_path)
576
+ configs.append(config)
577
+ except Exception as e:
578
+ if self.strict_mode:
579
+ raise
580
+ # Log warning and continue
581
+
582
+ # Also try .yml extension
583
+ if pattern.endswith(".yaml"):
584
+ yml_pattern = pattern.replace(".yaml", ".yml")
585
+ for file_path in glob_method(yml_pattern):
586
+ if file_path.is_file() and str(file_path) not in self._configs:
587
+ try:
588
+ config = self.load_file(file_path)
589
+ configs.append(config)
590
+ except Exception as e:
591
+ if self.strict_mode:
592
+ raise
593
+
594
+ return configs
595
+
596
+ def load_from_string(self, content: str, name: str = "inline") -> PatternConfigSchema:
597
+ """Load patterns from a YAML string.
598
+
599
+ Args:
600
+ content: YAML content
601
+ name: Name for the configuration
602
+
603
+ Returns:
604
+ Loaded configuration
605
+ """
606
+ data = _parse_yaml(content)
607
+ config = self._parse_config(data, name)
608
+
609
+ with self._lock:
610
+ self._configs[name] = config
611
+
612
+ return config
613
+
614
+ def _parse_config(self, data: dict[str, Any], source: str) -> PatternConfigSchema:
615
+ """Parse configuration data into schema object."""
616
+ # Parse standalone patterns
617
+ patterns = {}
618
+ for pattern_id, pattern_data in data.get("patterns", {}).items():
619
+ if isinstance(pattern_data, dict):
620
+ patterns[pattern_id] = PatternConfig.from_dict(pattern_data, pattern_id)
621
+
622
+ # Parse groups
623
+ groups = {}
624
+ for group_id, group_data in data.get("groups", {}).items():
625
+ if isinstance(group_data, dict):
626
+ groups[group_id] = PatternGroup.from_dict(group_data, group_id)
627
+
628
+ return PatternConfigSchema(
629
+ version=data.get("version", "1.0"),
630
+ name=data.get("name", ""),
631
+ description=data.get("description", ""),
632
+ patterns=patterns,
633
+ groups=groups,
634
+ extends=data.get("extends", []),
635
+ metadata=data.get("metadata", {}),
636
+ source_path=source,
637
+ )
638
+
639
+ def get_all_patterns(self, include_disabled: bool = False) -> list[PatternConfig]:
640
+ """Get all loaded patterns.
641
+
642
+ Args:
643
+ include_disabled: Whether to include disabled patterns
644
+
645
+ Returns:
646
+ List of all patterns, sorted by priority
647
+ """
648
+ all_patterns = []
649
+
650
+ with self._lock:
651
+ for config in self._configs.values():
652
+ all_patterns.extend(config.get_all_patterns(include_disabled))
653
+
654
+ # Remove duplicates by pattern_id (keep highest priority)
655
+ seen: dict[str, PatternConfig] = {}
656
+ for p in all_patterns:
657
+ if p.pattern_id not in seen or p.priority > seen[p.pattern_id].priority:
658
+ seen[p.pattern_id] = p
659
+
660
+ return sorted(seen.values(), key=lambda p: p.priority, reverse=True)
661
+
662
+ def get_pattern(self, pattern_id: str) -> PatternConfig | None:
663
+ """Get a specific pattern by ID.
664
+
665
+ Args:
666
+ pattern_id: Pattern identifier
667
+
668
+ Returns:
669
+ Pattern config or None
670
+ """
671
+ with self._lock:
672
+ for config in self._configs.values():
673
+ pattern = config.get_pattern(pattern_id)
674
+ if pattern:
675
+ return pattern
676
+ return None
677
+
678
+ def get_patterns_by_tag(self, tag: str) -> list[PatternConfig]:
679
+ """Get patterns with a specific tag.
680
+
681
+ Args:
682
+ tag: Tag to filter by
683
+
684
+ Returns:
685
+ List of matching patterns
686
+ """
687
+ return [p for p in self.get_all_patterns() if tag in p.tags]
688
+
689
+ def get_patterns_by_type(self, data_type: str | DataType) -> list[PatternConfig]:
690
+ """Get patterns for a specific data type.
691
+
692
+ Args:
693
+ data_type: Data type to filter by
694
+
695
+ Returns:
696
+ List of matching patterns
697
+ """
698
+ if isinstance(data_type, DataType):
699
+ data_type = data_type.value
700
+ return [p for p in self.get_all_patterns() if p.data_type == data_type]
701
+
702
+ def validate_config(self, config: PatternConfigSchema) -> list[str]:
703
+ """Validate a configuration.
704
+
705
+ Args:
706
+ config: Configuration to validate
707
+
708
+ Returns:
709
+ List of error messages
710
+ """
711
+ errors = []
712
+
713
+ for pattern in config.get_all_patterns(include_disabled=True):
714
+ # Validate regex
715
+ try:
716
+ re.compile(pattern.regex)
717
+ except re.error as e:
718
+ errors.append(f"Pattern '{pattern.pattern_id}': Invalid regex: {e}")
719
+
720
+ # Validate examples
721
+ example_results = pattern.validate_examples()
722
+ for value, passed, message in example_results:
723
+ if not passed:
724
+ errors.append(
725
+ f"Pattern '{pattern.pattern_id}': Example '{value}' failed: {message}"
726
+ )
727
+
728
+ # Validate data type
729
+ try:
730
+ DataType(pattern.data_type)
731
+ except ValueError:
732
+ errors.append(
733
+ f"Pattern '{pattern.pattern_id}': Unknown data_type '{pattern.data_type}'"
734
+ )
735
+
736
+ return errors
737
+
738
+ def enable_hot_reload(self, interval: float = 5.0) -> None:
739
+ """Enable hot-reload of configuration files.
740
+
741
+ Args:
742
+ interval: Check interval in seconds
743
+ """
744
+ if self._hot_reload_enabled:
745
+ return
746
+
747
+ self._hot_reload_enabled = True
748
+ self._stop_hot_reload.clear()
749
+
750
+ def watch_loop() -> None:
751
+ while not self._stop_hot_reload.wait(interval):
752
+ self._check_for_changes()
753
+
754
+ self._hot_reload_thread = threading.Thread(target=watch_loop, daemon=True)
755
+ self._hot_reload_thread.start()
756
+
757
+ def disable_hot_reload(self) -> None:
758
+ """Disable hot-reload."""
759
+ if not self._hot_reload_enabled:
760
+ return
761
+
762
+ self._stop_hot_reload.set()
763
+ if self._hot_reload_thread:
764
+ self._hot_reload_thread.join(timeout=2.0)
765
+ self._hot_reload_enabled = False
766
+
767
+ def _check_for_changes(self) -> None:
768
+ """Check for file changes and reload if necessary."""
769
+ with self._lock:
770
+ paths_to_reload = []
771
+
772
+ for path, mtime in list(self._file_mtimes.items()):
773
+ try:
774
+ current_mtime = Path(path).stat().st_mtime
775
+ if current_mtime > mtime:
776
+ paths_to_reload.append(path)
777
+ except OSError:
778
+ # File deleted or inaccessible
779
+ del self._configs[path]
780
+ del self._file_mtimes[path]
781
+
782
+ for path in paths_to_reload:
783
+ try:
784
+ self.load_file(path)
785
+ except Exception:
786
+ pass # Keep old config on reload failure
787
+
788
+ def clear(self) -> None:
789
+ """Clear all loaded configurations."""
790
+ with self._lock:
791
+ self._configs.clear()
792
+ self._file_mtimes.clear()
793
+
794
+
795
+ # =============================================================================
796
+ # Pattern Registry
797
+ # =============================================================================
798
+
799
+
800
+ class PatternRegistry:
801
+ """Global registry for custom patterns.
802
+
803
+ Provides a singleton-like interface for pattern management.
804
+
805
+ Example:
806
+ # Register patterns
807
+ registry = PatternRegistry()
808
+ registry.load_file("patterns.yaml")
809
+
810
+ # Use patterns
811
+ for pattern in registry.get_patterns():
812
+ if pattern.matches(value):
813
+ print(f"Matched: {pattern.name}")
814
+ """
815
+
816
+ _instance: "PatternRegistry | None" = None
817
+ _lock = threading.Lock()
818
+
819
+ def __new__(cls) -> "PatternRegistry":
820
+ if cls._instance is None:
821
+ with cls._lock:
822
+ if cls._instance is None:
823
+ cls._instance = super().__new__(cls)
824
+ cls._instance._initialized = False
825
+ return cls._instance
826
+
827
+ def __init__(self) -> None:
828
+ if getattr(self, "_initialized", False):
829
+ return
830
+
831
+ self._loader = PatternConfigLoader()
832
+ self._custom_patterns: dict[str, PatternConfig] = {}
833
+ self._initialized = True
834
+
835
+ @property
836
+ def loader(self) -> PatternConfigLoader:
837
+ """Access the internal loader."""
838
+ return self._loader
839
+
840
+ def load_file(self, path: str | Path) -> PatternConfigSchema:
841
+ """Load patterns from file."""
842
+ return self._loader.load_file(path)
843
+
844
+ def load_directory(self, directory: str | Path) -> list[PatternConfigSchema]:
845
+ """Load patterns from directory."""
846
+ return self._loader.load_directory(directory)
847
+
848
+ def register(self, pattern: PatternConfig) -> None:
849
+ """Register a pattern programmatically.
850
+
851
+ Args:
852
+ pattern: Pattern to register
853
+ """
854
+ self._custom_patterns[pattern.pattern_id] = pattern
855
+
856
+ def unregister(self, pattern_id: str) -> bool:
857
+ """Unregister a pattern.
858
+
859
+ Args:
860
+ pattern_id: Pattern to unregister
861
+
862
+ Returns:
863
+ True if pattern was removed
864
+ """
865
+ if pattern_id in self._custom_patterns:
866
+ del self._custom_patterns[pattern_id]
867
+ return True
868
+ return False
869
+
870
+ def get_patterns(self, include_disabled: bool = False) -> list[PatternConfig]:
871
+ """Get all registered patterns.
872
+
873
+ Args:
874
+ include_disabled: Whether to include disabled patterns
875
+
876
+ Returns:
877
+ List of patterns sorted by priority
878
+ """
879
+ all_patterns = self._loader.get_all_patterns(include_disabled)
880
+
881
+ # Add custom patterns
882
+ for p in self._custom_patterns.values():
883
+ if p.enabled or include_disabled:
884
+ all_patterns.append(p)
885
+
886
+ # Remove duplicates and sort
887
+ seen: dict[str, PatternConfig] = {}
888
+ for p in all_patterns:
889
+ if p.pattern_id not in seen or p.priority > seen[p.pattern_id].priority:
890
+ seen[p.pattern_id] = p
891
+
892
+ return sorted(seen.values(), key=lambda p: p.priority, reverse=True)
893
+
894
+ def get_pattern(self, pattern_id: str) -> PatternConfig | None:
895
+ """Get pattern by ID."""
896
+ if pattern_id in self._custom_patterns:
897
+ return self._custom_patterns[pattern_id]
898
+ return self._loader.get_pattern(pattern_id)
899
+
900
+ def match(self, value: str) -> list[PatternConfig]:
901
+ """Find all patterns that match a value.
902
+
903
+ Args:
904
+ value: Value to match
905
+
906
+ Returns:
907
+ List of matching patterns (highest priority first)
908
+ """
909
+ matches = []
910
+ for pattern in self.get_patterns():
911
+ if pattern.matches(value):
912
+ matches.append(pattern)
913
+ return matches
914
+
915
+ def match_first(self, value: str) -> PatternConfig | None:
916
+ """Find the first (highest priority) matching pattern.
917
+
918
+ Args:
919
+ value: Value to match
920
+
921
+ Returns:
922
+ First matching pattern or None
923
+ """
924
+ for pattern in self.get_patterns():
925
+ if pattern.matches(value):
926
+ return pattern
927
+ return None
928
+
929
+ def clear(self) -> None:
930
+ """Clear all patterns."""
931
+ self._loader.clear()
932
+ self._custom_patterns.clear()
933
+
934
+
935
+ # Global registry instance
936
+ pattern_registry = PatternRegistry()
937
+
938
+
939
+ # =============================================================================
940
+ # Default Patterns
941
+ # =============================================================================
942
+
943
+
944
+ DEFAULT_PATTERNS_YAML = r"""
945
+ version: "1.0"
946
+ name: "Default Patterns"
947
+ description: "Built-in patterns for common data types"
948
+
949
+ patterns:
950
+ email:
951
+ name: Email Address
952
+ regex: "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
953
+ priority: 85
954
+ data_type: email
955
+ description: Standard email address format
956
+ examples:
957
+ - value: "user@example.com"
958
+ should_match: true
959
+ - value: "not-an-email"
960
+ should_match: false
961
+
962
+ url:
963
+ name: URL
964
+ regex: "^https?://[\\w.-]+(?:/[\\w./?%&=-]*)?$"
965
+ priority: 80
966
+ data_type: url
967
+ description: HTTP/HTTPS URL
968
+ examples:
969
+ - value: "https://example.com/path"
970
+ should_match: true
971
+
972
+ uuid:
973
+ name: UUID
974
+ regex: "^[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}$"
975
+ priority: 90
976
+ data_type: uuid
977
+ description: UUID v4 format
978
+ examples:
979
+ - value: "550e8400-e29b-41d4-a716-446655440000"
980
+ should_match: true
981
+
982
+ ip_v4:
983
+ name: IPv4 Address
984
+ regex: "^(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)$"
985
+ priority: 85
986
+ data_type: ip_address
987
+ description: IPv4 address
988
+ examples:
989
+ - value: "192.168.1.1"
990
+ should_match: true
991
+ - value: "256.1.1.1"
992
+ should_match: false
993
+
994
+ iso_date:
995
+ name: ISO Date
996
+ regex: "^\\d{4}-\\d{2}-\\d{2}$"
997
+ priority: 75
998
+ data_type: date
999
+ description: ISO 8601 date format
1000
+ examples:
1001
+ - value: "2024-12-25"
1002
+ should_match: true
1003
+
1004
+ iso_datetime:
1005
+ name: ISO DateTime
1006
+ regex: "^\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}:\\d{2}(?:\\.\\d+)?(?:Z|[+-]\\d{2}:\\d{2})?$"
1007
+ priority: 80
1008
+ data_type: datetime
1009
+ description: ISO 8601 datetime format
1010
+ examples:
1011
+ - value: "2024-12-25T10:30:00Z"
1012
+ should_match: true
1013
+
1014
+ groups:
1015
+ korean:
1016
+ name: Korean Patterns
1017
+ description: Patterns for Korean data formats
1018
+ priority_boost: 5
1019
+ patterns:
1020
+ korean_phone:
1021
+ name: Korean Phone Number
1022
+ regex: "^01[016789]-?\\d{3,4}-?\\d{4}$"
1023
+ priority: 90
1024
+ data_type: korean_phone
1025
+ examples:
1026
+ - value: "010-1234-5678"
1027
+ should_match: true
1028
+ - value: "01012345678"
1029
+ should_match: true
1030
+
1031
+ korean_rrn:
1032
+ name: Korean RRN
1033
+ regex: "^\\d{6}-?[1-4]\\d{6}$"
1034
+ priority: 95
1035
+ data_type: korean_rrn
1036
+ description: Korean Resident Registration Number
1037
+ examples:
1038
+ - value: "900101-1234567"
1039
+ should_match: true
1040
+
1041
+ korean_business_number:
1042
+ name: Korean Business Number
1043
+ regex: "^\\d{3}-\\d{2}-\\d{5}$"
1044
+ priority: 90
1045
+ data_type: korean_business_number
1046
+ examples:
1047
+ - value: "123-45-67890"
1048
+ should_match: true
1049
+ """
1050
+
1051
+
1052
+ def load_default_patterns() -> None:
1053
+ """Load default patterns into the global registry."""
1054
+ pattern_registry.loader.load_from_string(DEFAULT_PATTERNS_YAML, "defaults")
1055
+
1056
+
1057
+ # =============================================================================
1058
+ # Convenience Functions
1059
+ # =============================================================================
1060
+
1061
+
1062
+ def load_patterns(path: str | Path) -> list[PatternConfig]:
1063
+ """Load patterns from a file.
1064
+
1065
+ Args:
1066
+ path: Path to YAML file
1067
+
1068
+ Returns:
1069
+ List of loaded patterns
1070
+ """
1071
+ config = pattern_registry.load_file(path)
1072
+ return config.get_all_patterns()
1073
+
1074
+
1075
+ def load_patterns_directory(directory: str | Path) -> list[PatternConfig]:
1076
+ """Load patterns from a directory.
1077
+
1078
+ Args:
1079
+ directory: Directory containing YAML files
1080
+
1081
+ Returns:
1082
+ List of all loaded patterns
1083
+ """
1084
+ pattern_registry.load_directory(directory)
1085
+ return pattern_registry.get_patterns()
1086
+
1087
+
1088
+ def register_pattern(
1089
+ pattern_id: str,
1090
+ regex: str,
1091
+ name: str | None = None,
1092
+ data_type: str = "string",
1093
+ priority: int = PatternPriority.MEDIUM,
1094
+ **kwargs: Any,
1095
+ ) -> PatternConfig:
1096
+ """Register a pattern programmatically.
1097
+
1098
+ Args:
1099
+ pattern_id: Unique pattern identifier
1100
+ regex: Regular expression
1101
+ name: Human-readable name
1102
+ data_type: Inferred data type
1103
+ priority: Match priority
1104
+ **kwargs: Additional pattern options
1105
+
1106
+ Returns:
1107
+ Created pattern config
1108
+ """
1109
+ pattern = PatternConfig(
1110
+ pattern_id=pattern_id,
1111
+ name=name or pattern_id,
1112
+ regex=regex,
1113
+ data_type=data_type,
1114
+ priority=priority,
1115
+ **kwargs,
1116
+ )
1117
+ pattern_registry.register(pattern)
1118
+ return pattern
1119
+
1120
+
1121
+ def match_patterns(value: str) -> list[PatternConfig]:
1122
+ """Find patterns matching a value.
1123
+
1124
+ Args:
1125
+ value: Value to match
1126
+
1127
+ Returns:
1128
+ List of matching patterns
1129
+ """
1130
+ return pattern_registry.match(value)
1131
+
1132
+
1133
+ def infer_type_from_patterns(value: str) -> DataType | None:
1134
+ """Infer data type from matching patterns.
1135
+
1136
+ Args:
1137
+ value: Value to analyze
1138
+
1139
+ Returns:
1140
+ Inferred DataType or None
1141
+ """
1142
+ pattern = pattern_registry.match_first(value)
1143
+ if pattern:
1144
+ return pattern.get_data_type()
1145
+ return None
1146
+
1147
+
1148
+ def export_patterns(
1149
+ path: str | Path,
1150
+ patterns: list[PatternConfig] | None = None,
1151
+ ) -> None:
1152
+ """Export patterns to a YAML file.
1153
+
1154
+ Args:
1155
+ path: Output file path
1156
+ patterns: Patterns to export (defaults to all registered)
1157
+ """
1158
+ if patterns is None:
1159
+ patterns = pattern_registry.get_patterns()
1160
+
1161
+ config = {
1162
+ "version": "1.0",
1163
+ "name": "Exported Patterns",
1164
+ "patterns": {p.pattern_id: p.to_dict() for p in patterns},
1165
+ }
1166
+
1167
+ content = _dump_yaml(config)
1168
+
1169
+ with open(path, "w", encoding="utf-8") as f:
1170
+ f.write(content)