truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,746 @@
1
+ """Base classes and data structures for data lineage.
2
+
3
+ This module provides the core abstractions for lineage tracking:
4
+ - LineageNode: Represents a data asset in the lineage graph
5
+ - LineageEdge: Represents a relationship between nodes
6
+ - LineageGraph: The complete lineage DAG
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from abc import ABC, abstractmethod
12
+ from dataclasses import dataclass, field
13
+ from datetime import datetime
14
+ from enum import Enum
15
+ from typing import (
16
+ TYPE_CHECKING,
17
+ Any,
18
+ Callable,
19
+ Dict,
20
+ Iterator,
21
+ List,
22
+ Optional,
23
+ Set,
24
+ Tuple,
25
+ )
26
+ import threading
27
+ import json
28
+ from pathlib import Path
29
+ import uuid
30
+
31
+
32
+ # =============================================================================
33
+ # Enums
34
+ # =============================================================================
35
+
36
+
37
+ class NodeType(str, Enum):
38
+ """Types of nodes in the lineage graph."""
39
+
40
+ SOURCE = "source" # Raw data source
41
+ TABLE = "table" # Database table
42
+ FILE = "file" # File-based data
43
+ STREAM = "stream" # Streaming source
44
+ TRANSFORMATION = "transformation" # Data transformation
45
+ VALIDATION = "validation" # Validation checkpoint
46
+ MODEL = "model" # ML model
47
+ REPORT = "report" # Output report
48
+ EXTERNAL = "external" # External system
49
+ VIRTUAL = "virtual" # Virtual/computed dataset
50
+
51
+
52
+ class EdgeType(str, Enum):
53
+ """Types of edges in the lineage graph."""
54
+
55
+ DERIVED_FROM = "derived_from" # Data derivation
56
+ VALIDATED_BY = "validated_by" # Validation relationship
57
+ USED_BY = "used_by" # Usage relationship
58
+ TRANSFORMED_TO = "transformed_to" # Transformation
59
+ JOINED_WITH = "joined_with" # Join operation
60
+ AGGREGATED_TO = "aggregated_to" # Aggregation
61
+ FILTERED_TO = "filtered_to" # Filter operation
62
+ DEPENDS_ON = "depends_on" # Generic dependency
63
+
64
+
65
+ class OperationType(str, Enum):
66
+ """Types of data operations."""
67
+
68
+ READ = "read"
69
+ WRITE = "write"
70
+ TRANSFORM = "transform"
71
+ FILTER = "filter"
72
+ JOIN = "join"
73
+ AGGREGATE = "aggregate"
74
+ VALIDATE = "validate"
75
+ PROFILE = "profile"
76
+ MASK = "mask"
77
+ EXPORT = "export"
78
+
79
+
80
+ # =============================================================================
81
+ # Exceptions
82
+ # =============================================================================
83
+
84
+
85
+ class LineageError(Exception):
86
+ """Base exception for lineage-related errors."""
87
+
88
+ pass
89
+
90
+
91
+ class NodeNotFoundError(LineageError):
92
+ """Raised when a node is not found in the graph."""
93
+
94
+ def __init__(self, node_id: str):
95
+ self.node_id = node_id
96
+ super().__init__(f"Node not found: {node_id}")
97
+
98
+
99
+ class CyclicDependencyError(LineageError):
100
+ """Raised when a cyclic dependency is detected."""
101
+
102
+ def __init__(self, cycle: list[str]):
103
+ self.cycle = cycle
104
+ super().__init__(f"Cyclic dependency detected: {' -> '.join(cycle)}")
105
+
106
+
107
+ # =============================================================================
108
+ # Configuration
109
+ # =============================================================================
110
+
111
+
112
+ @dataclass
113
+ class LineageConfig:
114
+ """Configuration for lineage tracking.
115
+
116
+ Attributes:
117
+ track_column_level: Track column-level lineage
118
+ track_row_level: Track row-level lineage (expensive)
119
+ store_samples: Store sample values at each node
120
+ max_history: Maximum history entries per node
121
+ auto_track: Automatically track operations
122
+ persist_path: Path to persist lineage data
123
+ """
124
+
125
+ track_column_level: bool = True
126
+ track_row_level: bool = False
127
+ store_samples: bool = False
128
+ max_history: int = 100
129
+ auto_track: bool = True
130
+ persist_path: str | Path | None = None
131
+ metadata: dict[str, Any] = field(default_factory=dict)
132
+
133
+
134
+ # =============================================================================
135
+ # Core Data Structures
136
+ # =============================================================================
137
+
138
+
139
+ @dataclass
140
+ class LineageMetadata:
141
+ """Metadata for a lineage node or edge.
142
+
143
+ Stores additional context about the data or operation.
144
+ """
145
+
146
+ description: str = ""
147
+ owner: str = ""
148
+ tags: tuple[str, ...] = field(default_factory=tuple)
149
+ properties: dict[str, Any] = field(default_factory=dict)
150
+ created_at: datetime = field(default_factory=datetime.now)
151
+ updated_at: datetime = field(default_factory=datetime.now)
152
+
153
+ def to_dict(self) -> dict[str, Any]:
154
+ return {
155
+ "description": self.description,
156
+ "owner": self.owner,
157
+ "tags": list(self.tags),
158
+ "properties": self.properties,
159
+ "created_at": self.created_at.isoformat(),
160
+ "updated_at": self.updated_at.isoformat(),
161
+ }
162
+
163
+ @classmethod
164
+ def from_dict(cls, data: dict[str, Any]) -> "LineageMetadata":
165
+ return cls(
166
+ description=data.get("description", ""),
167
+ owner=data.get("owner", ""),
168
+ tags=tuple(data.get("tags", [])),
169
+ properties=data.get("properties", {}),
170
+ created_at=datetime.fromisoformat(data["created_at"]) if data.get("created_at") else datetime.now(),
171
+ updated_at=datetime.fromisoformat(data["updated_at"]) if data.get("updated_at") else datetime.now(),
172
+ )
173
+
174
+
175
+ @dataclass
176
+ class ColumnLineage:
177
+ """Column-level lineage information.
178
+
179
+ Tracks which columns derive from which source columns.
180
+ """
181
+
182
+ column: str
183
+ source_columns: tuple[tuple[str, str], ...] = field(default_factory=tuple) # (node_id, column_name)
184
+ transformation: str = "" # Description of transformation
185
+ dtype: str = ""
186
+
187
+ def to_dict(self) -> dict[str, Any]:
188
+ return {
189
+ "column": self.column,
190
+ "source_columns": [
191
+ {"node": node, "column": col}
192
+ for node, col in self.source_columns
193
+ ],
194
+ "transformation": self.transformation,
195
+ "dtype": self.dtype,
196
+ }
197
+
198
+
199
+ @dataclass
200
+ class LineageNode:
201
+ """A node in the lineage graph.
202
+
203
+ Represents a data asset (table, file, transformation result, etc.)
204
+ in the data lineage.
205
+
206
+ Attributes:
207
+ id: Unique identifier
208
+ name: Human-readable name
209
+ node_type: Type of node
210
+ location: Physical location (path, URI, etc.)
211
+ schema: Column schema if applicable
212
+ metadata: Additional metadata
213
+ column_lineage: Column-level lineage information
214
+ """
215
+
216
+ id: str
217
+ name: str
218
+ node_type: NodeType
219
+ location: str = ""
220
+ schema: dict[str, str] = field(default_factory=dict) # column -> dtype
221
+ metadata: LineageMetadata = field(default_factory=LineageMetadata)
222
+ column_lineage: tuple[ColumnLineage, ...] = field(default_factory=tuple)
223
+ created_at: datetime = field(default_factory=datetime.now)
224
+ updated_at: datetime = field(default_factory=datetime.now)
225
+
226
+ def __hash__(self) -> int:
227
+ return hash(self.id)
228
+
229
+ def __eq__(self, other: object) -> bool:
230
+ if not isinstance(other, LineageNode):
231
+ return False
232
+ return self.id == other.id
233
+
234
+ def to_dict(self) -> dict[str, Any]:
235
+ return {
236
+ "id": self.id,
237
+ "name": self.name,
238
+ "node_type": self.node_type.value,
239
+ "location": self.location,
240
+ "schema": self.schema,
241
+ "metadata": self.metadata.to_dict(),
242
+ "column_lineage": [cl.to_dict() for cl in self.column_lineage],
243
+ "created_at": self.created_at.isoformat(),
244
+ "updated_at": self.updated_at.isoformat(),
245
+ }
246
+
247
+ @classmethod
248
+ def from_dict(cls, data: dict[str, Any]) -> "LineageNode":
249
+ return cls(
250
+ id=data["id"],
251
+ name=data["name"],
252
+ node_type=NodeType(data["node_type"]),
253
+ location=data.get("location", ""),
254
+ schema=data.get("schema", {}),
255
+ metadata=LineageMetadata.from_dict(data.get("metadata", {})),
256
+ column_lineage=tuple(
257
+ ColumnLineage(
258
+ column=cl["column"],
259
+ source_columns=tuple(
260
+ (sc["node"], sc["column"])
261
+ for sc in cl.get("source_columns", [])
262
+ ),
263
+ transformation=cl.get("transformation", ""),
264
+ dtype=cl.get("dtype", ""),
265
+ )
266
+ for cl in data.get("column_lineage", [])
267
+ ),
268
+ created_at=datetime.fromisoformat(data["created_at"]) if data.get("created_at") else datetime.now(),
269
+ updated_at=datetime.fromisoformat(data["updated_at"]) if data.get("updated_at") else datetime.now(),
270
+ )
271
+
272
+
273
+ @dataclass
274
+ class LineageEdge:
275
+ """An edge in the lineage graph.
276
+
277
+ Represents a relationship between two nodes (e.g., derivation,
278
+ transformation, validation).
279
+
280
+ Attributes:
281
+ source: Source node ID
282
+ target: Target node ID
283
+ edge_type: Type of relationship
284
+ operation: Operation that created this relationship
285
+ metadata: Additional metadata about the relationship
286
+ """
287
+
288
+ source: str
289
+ target: str
290
+ edge_type: EdgeType
291
+ operation: OperationType = OperationType.TRANSFORM
292
+ metadata: LineageMetadata = field(default_factory=LineageMetadata)
293
+ created_at: datetime = field(default_factory=datetime.now)
294
+
295
+ def __hash__(self) -> int:
296
+ return hash((self.source, self.target, self.edge_type))
297
+
298
+ def __eq__(self, other: object) -> bool:
299
+ if not isinstance(other, LineageEdge):
300
+ return False
301
+ return (
302
+ self.source == other.source
303
+ and self.target == other.target
304
+ and self.edge_type == other.edge_type
305
+ )
306
+
307
+ def to_dict(self) -> dict[str, Any]:
308
+ return {
309
+ "source": self.source,
310
+ "target": self.target,
311
+ "edge_type": self.edge_type.value,
312
+ "operation": self.operation.value,
313
+ "metadata": self.metadata.to_dict(),
314
+ "created_at": self.created_at.isoformat(),
315
+ }
316
+
317
+ @classmethod
318
+ def from_dict(cls, data: dict[str, Any]) -> "LineageEdge":
319
+ return cls(
320
+ source=data["source"],
321
+ target=data["target"],
322
+ edge_type=EdgeType(data["edge_type"]),
323
+ operation=OperationType(data.get("operation", "transform")),
324
+ metadata=LineageMetadata.from_dict(data.get("metadata", {})),
325
+ created_at=datetime.fromisoformat(data["created_at"]) if data.get("created_at") else datetime.now(),
326
+ )
327
+
328
+
329
+ # =============================================================================
330
+ # Lineage Graph
331
+ # =============================================================================
332
+
333
+
334
+ class LineageGraph:
335
+ """A directed acyclic graph representing data lineage.
336
+
337
+ Provides methods for:
338
+ - Adding/removing nodes and edges
339
+ - Querying lineage (upstream/downstream)
340
+ - Detecting cycles
341
+ - Serialization/deserialization
342
+ - Graph traversal
343
+
344
+ Example:
345
+ >>> graph = LineageGraph()
346
+ >>> graph.add_node(LineageNode(id="raw", name="Raw Data", node_type=NodeType.SOURCE))
347
+ >>> graph.add_node(LineageNode(id="clean", name="Clean Data", node_type=NodeType.TRANSFORMATION))
348
+ >>> graph.add_edge(LineageEdge(source="raw", target="clean", edge_type=EdgeType.TRANSFORMED_TO))
349
+ >>> print(graph.get_downstream("raw"))
350
+ """
351
+
352
+ def __init__(self, config: LineageConfig | None = None):
353
+ """Initialize the lineage graph.
354
+
355
+ Args:
356
+ config: Optional configuration
357
+ """
358
+ self._config = config or LineageConfig()
359
+ self._nodes: dict[str, LineageNode] = {}
360
+ self._edges: list[LineageEdge] = []
361
+ self._adjacency: dict[str, list[str]] = {} # Forward edges
362
+ self._reverse_adjacency: dict[str, list[str]] = {} # Backward edges
363
+ self._lock = threading.RLock()
364
+
365
+ @property
366
+ def config(self) -> LineageConfig:
367
+ return self._config
368
+
369
+ def add_node(self, node: LineageNode) -> None:
370
+ """Add a node to the graph.
371
+
372
+ Args:
373
+ node: Node to add
374
+
375
+ Raises:
376
+ ValueError: If node with same ID already exists
377
+ """
378
+ with self._lock:
379
+ if node.id in self._nodes:
380
+ # Update existing node
381
+ self._nodes[node.id] = node
382
+ else:
383
+ self._nodes[node.id] = node
384
+ self._adjacency[node.id] = []
385
+ self._reverse_adjacency[node.id] = []
386
+
387
+ def remove_node(self, node_id: str) -> LineageNode:
388
+ """Remove a node and all its edges.
389
+
390
+ Args:
391
+ node_id: ID of node to remove
392
+
393
+ Returns:
394
+ Removed node
395
+
396
+ Raises:
397
+ NodeNotFoundError: If node not found
398
+ """
399
+ with self._lock:
400
+ if node_id not in self._nodes:
401
+ raise NodeNotFoundError(node_id)
402
+
403
+ node = self._nodes.pop(node_id)
404
+
405
+ # Remove edges
406
+ self._edges = [
407
+ e for e in self._edges
408
+ if e.source != node_id and e.target != node_id
409
+ ]
410
+
411
+ # Update adjacency
412
+ if node_id in self._adjacency:
413
+ del self._adjacency[node_id]
414
+ if node_id in self._reverse_adjacency:
415
+ del self._reverse_adjacency[node_id]
416
+
417
+ for adj_list in self._adjacency.values():
418
+ if node_id in adj_list:
419
+ adj_list.remove(node_id)
420
+ for adj_list in self._reverse_adjacency.values():
421
+ if node_id in adj_list:
422
+ adj_list.remove(node_id)
423
+
424
+ return node
425
+
426
+ def get_node(self, node_id: str) -> LineageNode:
427
+ """Get a node by ID.
428
+
429
+ Args:
430
+ node_id: Node ID
431
+
432
+ Returns:
433
+ The node
434
+
435
+ Raises:
436
+ NodeNotFoundError: If node not found
437
+ """
438
+ with self._lock:
439
+ if node_id not in self._nodes:
440
+ raise NodeNotFoundError(node_id)
441
+ return self._nodes[node_id]
442
+
443
+ def has_node(self, node_id: str) -> bool:
444
+ """Check if a node exists."""
445
+ with self._lock:
446
+ return node_id in self._nodes
447
+
448
+ def add_edge(self, edge: LineageEdge) -> None:
449
+ """Add an edge to the graph.
450
+
451
+ Args:
452
+ edge: Edge to add
453
+
454
+ Raises:
455
+ NodeNotFoundError: If source or target node not found
456
+ CyclicDependencyError: If edge would create a cycle
457
+ """
458
+ with self._lock:
459
+ if edge.source not in self._nodes:
460
+ raise NodeNotFoundError(edge.source)
461
+ if edge.target not in self._nodes:
462
+ raise NodeNotFoundError(edge.target)
463
+
464
+ # Check for cycles
465
+ if self._would_create_cycle(edge.source, edge.target):
466
+ cycle = self._find_cycle(edge.source, edge.target)
467
+ raise CyclicDependencyError(cycle)
468
+
469
+ # Check if edge already exists
470
+ for existing in self._edges:
471
+ if existing == edge:
472
+ return # Edge already exists
473
+
474
+ self._edges.append(edge)
475
+ self._adjacency[edge.source].append(edge.target)
476
+ self._reverse_adjacency[edge.target].append(edge.source)
477
+
478
+ def remove_edge(self, source: str, target: str) -> LineageEdge | None:
479
+ """Remove an edge from the graph.
480
+
481
+ Args:
482
+ source: Source node ID
483
+ target: Target node ID
484
+
485
+ Returns:
486
+ Removed edge or None if not found
487
+ """
488
+ with self._lock:
489
+ for i, edge in enumerate(self._edges):
490
+ if edge.source == source and edge.target == target:
491
+ removed = self._edges.pop(i)
492
+ if target in self._adjacency.get(source, []):
493
+ self._adjacency[source].remove(target)
494
+ if source in self._reverse_adjacency.get(target, []):
495
+ self._reverse_adjacency[target].remove(source)
496
+ return removed
497
+ return None
498
+
499
+ def get_upstream(self, node_id: str, depth: int = -1) -> list[LineageNode]:
500
+ """Get all upstream (parent) nodes.
501
+
502
+ Args:
503
+ node_id: Starting node ID
504
+ depth: Maximum depth (-1 for unlimited)
505
+
506
+ Returns:
507
+ List of upstream nodes
508
+ """
509
+ with self._lock:
510
+ if node_id not in self._nodes:
511
+ raise NodeNotFoundError(node_id)
512
+
513
+ visited: set[str] = set()
514
+ result: list[LineageNode] = []
515
+ self._traverse_upstream(node_id, visited, result, depth, 0)
516
+ return result
517
+
518
+ def _traverse_upstream(
519
+ self,
520
+ node_id: str,
521
+ visited: set[str],
522
+ result: list[LineageNode],
523
+ max_depth: int,
524
+ current_depth: int,
525
+ ) -> None:
526
+ """Recursive upstream traversal."""
527
+ if max_depth != -1 and current_depth >= max_depth:
528
+ return
529
+
530
+ for parent_id in self._reverse_adjacency.get(node_id, []):
531
+ if parent_id not in visited:
532
+ visited.add(parent_id)
533
+ result.append(self._nodes[parent_id])
534
+ self._traverse_upstream(
535
+ parent_id, visited, result, max_depth, current_depth + 1
536
+ )
537
+
538
+ def get_downstream(self, node_id: str, depth: int = -1) -> list[LineageNode]:
539
+ """Get all downstream (child) nodes.
540
+
541
+ Args:
542
+ node_id: Starting node ID
543
+ depth: Maximum depth (-1 for unlimited)
544
+
545
+ Returns:
546
+ List of downstream nodes
547
+ """
548
+ with self._lock:
549
+ if node_id not in self._nodes:
550
+ raise NodeNotFoundError(node_id)
551
+
552
+ visited: set[str] = set()
553
+ result: list[LineageNode] = []
554
+ self._traverse_downstream(node_id, visited, result, depth, 0)
555
+ return result
556
+
557
+ def _traverse_downstream(
558
+ self,
559
+ node_id: str,
560
+ visited: set[str],
561
+ result: list[LineageNode],
562
+ max_depth: int,
563
+ current_depth: int,
564
+ ) -> None:
565
+ """Recursive downstream traversal."""
566
+ if max_depth != -1 and current_depth >= max_depth:
567
+ return
568
+
569
+ for child_id in self._adjacency.get(node_id, []):
570
+ if child_id not in visited:
571
+ visited.add(child_id)
572
+ result.append(self._nodes[child_id])
573
+ self._traverse_downstream(
574
+ child_id, visited, result, max_depth, current_depth + 1
575
+ )
576
+
577
+ def get_edges_for_node(
578
+ self, node_id: str, direction: str = "both"
579
+ ) -> list[LineageEdge]:
580
+ """Get all edges connected to a node.
581
+
582
+ Args:
583
+ node_id: Node ID
584
+ direction: 'incoming', 'outgoing', or 'both'
585
+
586
+ Returns:
587
+ List of edges
588
+ """
589
+ with self._lock:
590
+ edges = []
591
+ for edge in self._edges:
592
+ if direction in ("outgoing", "both") and edge.source == node_id:
593
+ edges.append(edge)
594
+ elif direction in ("incoming", "both") and edge.target == node_id:
595
+ edges.append(edge)
596
+ return edges
597
+
598
+ def _would_create_cycle(self, source: str, target: str) -> bool:
599
+ """Check if adding an edge would create a cycle."""
600
+ # If target can reach source, adding source->target creates a cycle
601
+ visited: set[str] = set()
602
+ return self._can_reach(target, source, visited)
603
+
604
+ def _can_reach(
605
+ self, start: str, end: str, visited: set[str]
606
+ ) -> bool:
607
+ """Check if end is reachable from start."""
608
+ if start == end:
609
+ return True
610
+ if start in visited:
611
+ return False
612
+ visited.add(start)
613
+ for neighbor in self._adjacency.get(start, []):
614
+ if self._can_reach(neighbor, end, visited):
615
+ return True
616
+ return False
617
+
618
+ def _find_cycle(self, source: str, target: str) -> list[str]:
619
+ """Find the cycle path if adding source->target creates one."""
620
+ path = [target]
621
+ self._find_path(target, source, path)
622
+ path.append(source)
623
+ path.append(target)
624
+ return path
625
+
626
+ def _find_path(
627
+ self, start: str, end: str, path: list[str]
628
+ ) -> bool:
629
+ """Find path from start to end."""
630
+ for neighbor in self._adjacency.get(start, []):
631
+ if neighbor == end:
632
+ return True
633
+ if neighbor not in path:
634
+ path.append(neighbor)
635
+ if self._find_path(neighbor, end, path):
636
+ return True
637
+ path.pop()
638
+ return False
639
+
640
+ def get_roots(self) -> list[LineageNode]:
641
+ """Get all root nodes (no incoming edges)."""
642
+ with self._lock:
643
+ roots = []
644
+ for node_id, node in self._nodes.items():
645
+ if not self._reverse_adjacency.get(node_id):
646
+ roots.append(node)
647
+ return roots
648
+
649
+ def get_leaves(self) -> list[LineageNode]:
650
+ """Get all leaf nodes (no outgoing edges)."""
651
+ with self._lock:
652
+ leaves = []
653
+ for node_id, node in self._nodes.items():
654
+ if not self._adjacency.get(node_id):
655
+ leaves.append(node)
656
+ return leaves
657
+
658
+ def topological_sort(self) -> list[LineageNode]:
659
+ """Return nodes in topological order."""
660
+ with self._lock:
661
+ in_degree: dict[str, int] = {
662
+ node_id: len(self._reverse_adjacency.get(node_id, []))
663
+ for node_id in self._nodes
664
+ }
665
+
666
+ queue = [
667
+ node_id for node_id, degree in in_degree.items()
668
+ if degree == 0
669
+ ]
670
+ result = []
671
+
672
+ while queue:
673
+ node_id = queue.pop(0)
674
+ result.append(self._nodes[node_id])
675
+
676
+ for child_id in self._adjacency.get(node_id, []):
677
+ in_degree[child_id] -= 1
678
+ if in_degree[child_id] == 0:
679
+ queue.append(child_id)
680
+
681
+ return result
682
+
683
+ @property
684
+ def nodes(self) -> list[LineageNode]:
685
+ """Get all nodes."""
686
+ with self._lock:
687
+ return list(self._nodes.values())
688
+
689
+ @property
690
+ def edges(self) -> list[LineageEdge]:
691
+ """Get all edges."""
692
+ with self._lock:
693
+ return list(self._edges)
694
+
695
+ @property
696
+ def node_count(self) -> int:
697
+ """Get number of nodes."""
698
+ return len(self._nodes)
699
+
700
+ @property
701
+ def edge_count(self) -> int:
702
+ """Get number of edges."""
703
+ return len(self._edges)
704
+
705
+ def to_dict(self) -> dict[str, Any]:
706
+ """Serialize graph to dictionary."""
707
+ with self._lock:
708
+ return {
709
+ "nodes": [node.to_dict() for node in self._nodes.values()],
710
+ "edges": [edge.to_dict() for edge in self._edges],
711
+ }
712
+
713
+ @classmethod
714
+ def from_dict(cls, data: dict[str, Any], config: LineageConfig | None = None) -> "LineageGraph":
715
+ """Deserialize graph from dictionary."""
716
+ graph = cls(config)
717
+ for node_data in data.get("nodes", []):
718
+ graph.add_node(LineageNode.from_dict(node_data))
719
+ for edge_data in data.get("edges", []):
720
+ graph.add_edge(LineageEdge.from_dict(edge_data))
721
+ return graph
722
+
723
+ def save(self, path: str | Path) -> None:
724
+ """Save graph to file."""
725
+ path = Path(path)
726
+ path.parent.mkdir(parents=True, exist_ok=True)
727
+ with open(path, "w") as f:
728
+ json.dump(self.to_dict(), f, indent=2)
729
+
730
+ @classmethod
731
+ def load(cls, path: str | Path, config: LineageConfig | None = None) -> "LineageGraph":
732
+ """Load graph from file."""
733
+ with open(path) as f:
734
+ data = json.load(f)
735
+ return cls.from_dict(data, config)
736
+
737
+ def clear(self) -> None:
738
+ """Clear all nodes and edges."""
739
+ with self._lock:
740
+ self._nodes.clear()
741
+ self._edges.clear()
742
+ self._adjacency.clear()
743
+ self._reverse_adjacency.clear()
744
+
745
+ def __repr__(self) -> str:
746
+ return f"<LineageGraph nodes={self.node_count} edges={self.edge_count}>"