truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,775 @@
1
+ """Concurrent index management for filesystem stores.
2
+
3
+ This module provides thread-safe and process-safe index operations
4
+ for the filesystem store. The index maintains metadata about stored
5
+ items for fast lookups without reading the actual files.
6
+
7
+ Key features:
8
+ - MVCC-like reads (consistent snapshots)
9
+ - Write-ahead logging for durability
10
+ - Automatic index recovery
11
+ - Transaction support for batch updates
12
+
13
+ Example:
14
+ >>> index = ConcurrentIndex(Path(".truthound/store"))
15
+ >>> with index.transaction() as txn:
16
+ ... txn.add("item-1", {"data_asset": "customers.csv"})
17
+ ... txn.add("item-2", {"data_asset": "orders.csv"})
18
+ ... txn.commit()
19
+ """
20
+
21
+ from __future__ import annotations
22
+
23
+ import json
24
+ import os
25
+ import threading
26
+ import time
27
+ from contextlib import contextmanager
28
+ from dataclasses import dataclass, field
29
+ from datetime import datetime
30
+ from pathlib import Path
31
+ from typing import Any, Callable, Iterator, TypeVar
32
+
33
+ from truthound.stores.concurrency.locks import LockMode
34
+ from truthound.stores.concurrency.manager import FileLockManager, get_default_manager
35
+ from truthound.stores.concurrency.atomic import atomic_write, atomic_read, AtomicFileWriter
36
+
37
+
38
+ T = TypeVar("T")
39
+
40
+
41
+ @dataclass
42
+ class IndexEntry:
43
+ """Represents a single entry in the index.
44
+
45
+ Attributes:
46
+ item_id: Unique identifier for the item.
47
+ metadata: Metadata about the item.
48
+ created_at: When the entry was created.
49
+ updated_at: When the entry was last updated.
50
+ version: Entry version for optimistic locking.
51
+ """
52
+
53
+ item_id: str
54
+ metadata: dict[str, Any]
55
+ created_at: datetime = field(default_factory=datetime.now)
56
+ updated_at: datetime = field(default_factory=datetime.now)
57
+ version: int = 1
58
+
59
+ def to_dict(self) -> dict[str, Any]:
60
+ """Convert to dictionary for serialization."""
61
+ return {
62
+ "item_id": self.item_id,
63
+ "metadata": self.metadata,
64
+ "created_at": self.created_at.isoformat(),
65
+ "updated_at": self.updated_at.isoformat(),
66
+ "version": self.version,
67
+ }
68
+
69
+ @classmethod
70
+ def from_dict(cls, data: dict[str, Any]) -> "IndexEntry":
71
+ """Create from dictionary."""
72
+ return cls(
73
+ item_id=data["item_id"],
74
+ metadata=data.get("metadata", {}),
75
+ created_at=datetime.fromisoformat(data["created_at"])
76
+ if "created_at" in data
77
+ else datetime.now(),
78
+ updated_at=datetime.fromisoformat(data["updated_at"])
79
+ if "updated_at" in data
80
+ else datetime.now(),
81
+ version=data.get("version", 1),
82
+ )
83
+
84
+
85
+ @dataclass
86
+ class IndexSnapshot:
87
+ """Immutable snapshot of the index at a point in time.
88
+
89
+ Used for MVCC-like reads to provide consistent view of index.
90
+ """
91
+
92
+ entries: dict[str, IndexEntry]
93
+ version: int
94
+ timestamp: datetime = field(default_factory=datetime.now)
95
+
96
+ def get(self, item_id: str) -> IndexEntry | None:
97
+ """Get an entry by ID."""
98
+ return self.entries.get(item_id)
99
+
100
+ def contains(self, item_id: str) -> bool:
101
+ """Check if an entry exists."""
102
+ return item_id in self.entries
103
+
104
+ def list_ids(self) -> list[str]:
105
+ """List all item IDs."""
106
+ return list(self.entries.keys())
107
+
108
+ def filter(
109
+ self,
110
+ predicate: Callable[[IndexEntry], bool],
111
+ ) -> list[IndexEntry]:
112
+ """Filter entries by predicate."""
113
+ return [e for e in self.entries.values() if predicate(e)]
114
+
115
+ def __len__(self) -> int:
116
+ return len(self.entries)
117
+
118
+
119
+ class IndexTransaction:
120
+ """Transaction for batch index updates.
121
+
122
+ Provides ACID-like semantics for index modifications:
123
+ - Atomic: All changes applied or none
124
+ - Consistent: Index remains valid after transaction
125
+ - Isolated: Changes not visible until commit
126
+ - Durable: Changes persisted after commit
127
+
128
+ Example:
129
+ >>> with index.transaction() as txn:
130
+ ... txn.add("item-1", {"key": "value"})
131
+ ... txn.update("item-2", {"key": "new-value"})
132
+ ... txn.remove("item-3")
133
+ ... txn.commit()
134
+ """
135
+
136
+ def __init__(
137
+ self,
138
+ index: "ConcurrentIndex",
139
+ snapshot: IndexSnapshot,
140
+ ) -> None:
141
+ """Initialize the transaction.
142
+
143
+ Args:
144
+ index: The parent index.
145
+ snapshot: Initial snapshot for the transaction.
146
+ """
147
+ self._index = index
148
+ self._snapshot = snapshot
149
+ self._pending_adds: dict[str, IndexEntry] = {}
150
+ self._pending_updates: dict[str, IndexEntry] = {}
151
+ self._pending_removes: set[str] = set()
152
+ self._committed = False
153
+ self._rolled_back = False
154
+
155
+ def add(self, item_id: str, metadata: dict[str, Any]) -> IndexEntry:
156
+ """Add a new entry to the index.
157
+
158
+ Args:
159
+ item_id: Unique identifier for the item.
160
+ metadata: Metadata to store.
161
+
162
+ Returns:
163
+ The created entry.
164
+
165
+ Raises:
166
+ ValueError: If item already exists.
167
+ """
168
+ self._check_active()
169
+
170
+ if self._snapshot.contains(item_id) or item_id in self._pending_adds:
171
+ if item_id not in self._pending_removes:
172
+ raise ValueError(f"Item already exists: {item_id}")
173
+
174
+ entry = IndexEntry(item_id=item_id, metadata=metadata)
175
+ self._pending_adds[item_id] = entry
176
+ self._pending_removes.discard(item_id)
177
+ return entry
178
+
179
+ def update(
180
+ self,
181
+ item_id: str,
182
+ metadata: dict[str, Any],
183
+ merge: bool = True,
184
+ ) -> IndexEntry:
185
+ """Update an existing entry.
186
+
187
+ Args:
188
+ item_id: Item to update.
189
+ metadata: New metadata.
190
+ merge: If True, merge with existing metadata.
191
+
192
+ Returns:
193
+ The updated entry.
194
+
195
+ Raises:
196
+ KeyError: If item doesn't exist.
197
+ """
198
+ self._check_active()
199
+
200
+ existing = self._get_current(item_id)
201
+ if existing is None:
202
+ raise KeyError(f"Item not found: {item_id}")
203
+
204
+ if merge:
205
+ new_metadata = {**existing.metadata, **metadata}
206
+ else:
207
+ new_metadata = metadata
208
+
209
+ entry = IndexEntry(
210
+ item_id=item_id,
211
+ metadata=new_metadata,
212
+ created_at=existing.created_at,
213
+ updated_at=datetime.now(),
214
+ version=existing.version + 1,
215
+ )
216
+ self._pending_updates[item_id] = entry
217
+ return entry
218
+
219
+ def upsert(self, item_id: str, metadata: dict[str, Any]) -> IndexEntry:
220
+ """Add or update an entry.
221
+
222
+ Args:
223
+ item_id: Item identifier.
224
+ metadata: Metadata to store.
225
+
226
+ Returns:
227
+ The created or updated entry.
228
+ """
229
+ self._check_active()
230
+
231
+ existing = self._get_current(item_id)
232
+ if existing is None:
233
+ return self.add(item_id, metadata)
234
+ else:
235
+ return self.update(item_id, metadata)
236
+
237
+ def remove(self, item_id: str) -> bool:
238
+ """Remove an entry from the index.
239
+
240
+ Args:
241
+ item_id: Item to remove.
242
+
243
+ Returns:
244
+ True if item existed, False otherwise.
245
+ """
246
+ self._check_active()
247
+
248
+ exists = self._get_current(item_id) is not None
249
+
250
+ self._pending_removes.add(item_id)
251
+ self._pending_adds.pop(item_id, None)
252
+ self._pending_updates.pop(item_id, None)
253
+
254
+ return exists
255
+
256
+ def get(self, item_id: str) -> IndexEntry | None:
257
+ """Get an entry, including pending changes.
258
+
259
+ Args:
260
+ item_id: Item to get.
261
+
262
+ Returns:
263
+ Entry if found, None otherwise.
264
+ """
265
+ return self._get_current(item_id)
266
+
267
+ def _get_current(self, item_id: str) -> IndexEntry | None:
268
+ """Get current state of an entry including pending changes."""
269
+ if item_id in self._pending_removes:
270
+ return None
271
+ if item_id in self._pending_updates:
272
+ return self._pending_updates[item_id]
273
+ if item_id in self._pending_adds:
274
+ return self._pending_adds[item_id]
275
+ return self._snapshot.get(item_id)
276
+
277
+ def _check_active(self) -> None:
278
+ """Check that transaction is active."""
279
+ if self._committed:
280
+ raise RuntimeError("Transaction already committed")
281
+ if self._rolled_back:
282
+ raise RuntimeError("Transaction rolled back")
283
+
284
+ def commit(self) -> int:
285
+ """Commit the transaction.
286
+
287
+ Returns:
288
+ Number of changes applied.
289
+
290
+ Raises:
291
+ RuntimeError: If transaction is not active.
292
+ """
293
+ self._check_active()
294
+
295
+ changes = (
296
+ len(self._pending_adds)
297
+ + len(self._pending_updates)
298
+ + len(self._pending_removes)
299
+ )
300
+
301
+ if changes > 0:
302
+ self._index._apply_transaction(self)
303
+
304
+ self._committed = True
305
+ return changes
306
+
307
+ def rollback(self) -> None:
308
+ """Rollback the transaction, discarding all changes."""
309
+ self._check_active()
310
+ self._pending_adds.clear()
311
+ self._pending_updates.clear()
312
+ self._pending_removes.clear()
313
+ self._rolled_back = True
314
+
315
+ @property
316
+ def pending_changes(self) -> int:
317
+ """Number of pending changes."""
318
+ return (
319
+ len(self._pending_adds)
320
+ + len(self._pending_updates)
321
+ + len(self._pending_removes)
322
+ )
323
+
324
+ @property
325
+ def is_active(self) -> bool:
326
+ """Whether transaction is active."""
327
+ return not self._committed and not self._rolled_back
328
+
329
+
330
+ class ConcurrentIndex:
331
+ """Thread-safe and process-safe index for filesystem stores.
332
+
333
+ This class manages an index file that tracks metadata about stored
334
+ items. It provides:
335
+ - Consistent reads via snapshots
336
+ - Atomic writes via transactions
337
+ - Automatic recovery from corruption
338
+ - Write-ahead logging for durability
339
+
340
+ Example:
341
+ >>> index = ConcurrentIndex(Path(".truthound/store"))
342
+ >>>
343
+ >>> # Read operations (use snapshot)
344
+ >>> snapshot = index.snapshot()
345
+ >>> for item_id in snapshot.list_ids():
346
+ ... entry = snapshot.get(item_id)
347
+ >>>
348
+ >>> # Write operations (use transaction)
349
+ >>> with index.transaction() as txn:
350
+ ... txn.add("new-item", {"data_asset": "data.csv"})
351
+ ... txn.commit()
352
+ """
353
+
354
+ def __init__(
355
+ self,
356
+ base_path: Path | str,
357
+ index_filename: str = "_index.json",
358
+ lock_manager: FileLockManager | None = None,
359
+ wal_enabled: bool = True,
360
+ ) -> None:
361
+ """Initialize the concurrent index.
362
+
363
+ Args:
364
+ base_path: Base directory for the index.
365
+ index_filename: Name of the index file.
366
+ lock_manager: Lock manager to use.
367
+ wal_enabled: Whether to use write-ahead logging.
368
+ """
369
+ self._base_path = Path(base_path)
370
+ self._index_path = self._base_path / index_filename
371
+ self._wal_path = self._base_path / f"{index_filename}.wal"
372
+ self._lock_manager = lock_manager or get_default_manager()
373
+ self._wal_enabled = wal_enabled
374
+
375
+ # In-memory cache
376
+ self._cache: dict[str, IndexEntry] = {}
377
+ self._cache_version: int = 0
378
+ self._cache_lock = threading.RLock()
379
+ self._loaded = False
380
+
381
+ def initialize(self) -> None:
382
+ """Initialize the index, loading from disk if exists."""
383
+ with self._cache_lock:
384
+ if self._loaded:
385
+ return
386
+
387
+ self._base_path.mkdir(parents=True, exist_ok=True)
388
+
389
+ # Recover from WAL if needed
390
+ if self._wal_enabled and self._wal_path.exists():
391
+ self._recover_from_wal()
392
+
393
+ # Load index from disk or create empty
394
+ if self._index_path.exists():
395
+ self._load_from_disk()
396
+ else:
397
+ # Create empty index file
398
+ self._save_to_disk()
399
+
400
+ self._loaded = True
401
+
402
+ def _load_from_disk(self) -> None:
403
+ """Load index from disk file."""
404
+ try:
405
+ content = atomic_read(self._index_path, lock_manager=self._lock_manager)
406
+ data = json.loads(content.decode("utf-8"))
407
+
408
+ self._cache.clear()
409
+ for item_id, entry_data in data.get("entries", {}).items():
410
+ # Handle both old format (dict) and new format (IndexEntry)
411
+ if isinstance(entry_data, dict) and "item_id" not in entry_data:
412
+ # Old format: just metadata
413
+ entry = IndexEntry(item_id=item_id, metadata=entry_data)
414
+ else:
415
+ entry = IndexEntry.from_dict(entry_data)
416
+ self._cache[item_id] = entry
417
+
418
+ self._cache_version = data.get("version", 0)
419
+
420
+ except (json.JSONDecodeError, OSError):
421
+ # Index corrupted or missing, start fresh
422
+ self._cache.clear()
423
+ self._cache_version = 0
424
+
425
+ def _save_to_disk(self) -> None:
426
+ """Save index to disk file."""
427
+ data = {
428
+ "version": self._cache_version,
429
+ "updated_at": datetime.now().isoformat(),
430
+ "entries": {
431
+ item_id: entry.to_dict() for item_id, entry in self._cache.items()
432
+ },
433
+ }
434
+
435
+ content = json.dumps(data, indent=2, default=str)
436
+ atomic_write(
437
+ self._index_path,
438
+ content,
439
+ lock_manager=self._lock_manager,
440
+ )
441
+
442
+ def _recover_from_wal(self) -> None:
443
+ """Recover uncommitted changes from write-ahead log."""
444
+ try:
445
+ with open(self._wal_path, "r") as f:
446
+ for line in f:
447
+ line = line.strip()
448
+ if not line:
449
+ continue
450
+
451
+ operation = json.loads(line)
452
+ op_type = operation.get("type")
453
+ item_id = operation.get("item_id")
454
+
455
+ if op_type == "add" or op_type == "update":
456
+ entry_data = operation.get("entry", {})
457
+ self._cache[item_id] = IndexEntry.from_dict(entry_data)
458
+ elif op_type == "remove":
459
+ self._cache.pop(item_id, None)
460
+
461
+ # Save recovered state and remove WAL
462
+ self._cache_version += 1
463
+ self._save_to_disk()
464
+ self._wal_path.unlink(missing_ok=True)
465
+
466
+ except (json.JSONDecodeError, OSError):
467
+ # WAL corrupted, ignore
468
+ self._wal_path.unlink(missing_ok=True)
469
+
470
+ def _write_wal(self, operations: list[dict[str, Any]]) -> None:
471
+ """Write operations to write-ahead log."""
472
+ if not self._wal_enabled:
473
+ return
474
+
475
+ with open(self._wal_path, "a") as f:
476
+ for op in operations:
477
+ f.write(json.dumps(op, default=str) + "\n")
478
+ f.flush()
479
+ os.fsync(f.fileno())
480
+
481
+ def _clear_wal(self) -> None:
482
+ """Clear the write-ahead log."""
483
+ if self._wal_path.exists():
484
+ self._wal_path.unlink(missing_ok=True)
485
+
486
+ def snapshot(self) -> IndexSnapshot:
487
+ """Get an immutable snapshot of the index.
488
+
489
+ The snapshot provides a consistent view of the index that
490
+ won't change even if the index is modified.
491
+
492
+ Returns:
493
+ IndexSnapshot with current state.
494
+ """
495
+ self.initialize()
496
+
497
+ with self._cache_lock:
498
+ return IndexSnapshot(
499
+ entries=dict(self._cache),
500
+ version=self._cache_version,
501
+ )
502
+
503
+ def begin_transaction(self) -> IndexTransaction:
504
+ """Start a transaction for batch updates.
505
+
506
+ Returns:
507
+ IndexTransaction for making changes.
508
+ Caller is responsible for calling commit() or rollback().
509
+
510
+ Example:
511
+ >>> txn = index.begin_transaction()
512
+ >>> try:
513
+ ... txn.add("item", {"key": "value"})
514
+ ... txn.commit()
515
+ ... except Exception:
516
+ ... txn.rollback()
517
+ ... raise
518
+ """
519
+ self.initialize()
520
+ snapshot = self.snapshot()
521
+ return IndexTransaction(self, snapshot)
522
+
523
+ @contextmanager
524
+ def transaction(self) -> Iterator[IndexTransaction]:
525
+ """Start a transaction for batch updates with context manager.
526
+
527
+ Yields:
528
+ IndexTransaction for making changes.
529
+
530
+ Example:
531
+ >>> with index.transaction() as txn:
532
+ ... txn.add("item", {"key": "value"})
533
+ ... txn.commit()
534
+ """
535
+ txn = self.begin_transaction()
536
+
537
+ try:
538
+ yield txn
539
+ except Exception:
540
+ if txn.is_active:
541
+ txn.rollback()
542
+ raise
543
+ else:
544
+ if txn.is_active and txn.pending_changes > 0:
545
+ # Auto-commit if not explicitly committed/rolled back
546
+ txn.commit()
547
+
548
+ def _apply_transaction(self, txn: IndexTransaction) -> None:
549
+ """Apply a transaction's changes to the index.
550
+
551
+ Args:
552
+ txn: The transaction to apply.
553
+ """
554
+ with self._lock_manager.write_lock(self._index_path):
555
+ with self._cache_lock:
556
+ # Check for conflicts (optimistic concurrency)
557
+ current_snapshot = self.snapshot()
558
+ if current_snapshot.version != txn._snapshot.version:
559
+ # Check if any modified entries have changed
560
+ for item_id in list(txn._pending_updates.keys()) + list(
561
+ txn._pending_removes
562
+ ):
563
+ old_entry = txn._snapshot.get(item_id)
564
+ new_entry = current_snapshot.get(item_id)
565
+
566
+ if old_entry is None and new_entry is not None:
567
+ raise RuntimeError(
568
+ f"Conflict: {item_id} was added concurrently"
569
+ )
570
+ if old_entry is not None and new_entry is None:
571
+ raise RuntimeError(
572
+ f"Conflict: {item_id} was removed concurrently"
573
+ )
574
+ if (
575
+ old_entry is not None
576
+ and new_entry is not None
577
+ and old_entry.version != new_entry.version
578
+ ):
579
+ raise RuntimeError(
580
+ f"Conflict: {item_id} was modified concurrently"
581
+ )
582
+
583
+ # Write to WAL first
584
+ wal_operations = []
585
+ for item_id, entry in txn._pending_adds.items():
586
+ wal_operations.append(
587
+ {"type": "add", "item_id": item_id, "entry": entry.to_dict()}
588
+ )
589
+ for item_id, entry in txn._pending_updates.items():
590
+ wal_operations.append(
591
+ {"type": "update", "item_id": item_id, "entry": entry.to_dict()}
592
+ )
593
+ for item_id in txn._pending_removes:
594
+ wal_operations.append({"type": "remove", "item_id": item_id})
595
+
596
+ if wal_operations:
597
+ self._write_wal(wal_operations)
598
+
599
+ # Apply changes to cache
600
+ for item_id, entry in txn._pending_adds.items():
601
+ self._cache[item_id] = entry
602
+ for item_id, entry in txn._pending_updates.items():
603
+ self._cache[item_id] = entry
604
+ for item_id in txn._pending_removes:
605
+ self._cache.pop(item_id, None)
606
+
607
+ self._cache_version += 1
608
+
609
+ # Persist to disk
610
+ self._save_to_disk()
611
+
612
+ # Clear WAL after successful write
613
+ self._clear_wal()
614
+
615
+ # Convenience methods for simple operations
616
+
617
+ def add(self, item_id: str, metadata: dict[str, Any]) -> IndexEntry:
618
+ """Add a single entry (convenience method).
619
+
620
+ Args:
621
+ item_id: Item identifier.
622
+ metadata: Metadata to store.
623
+
624
+ Returns:
625
+ The created entry.
626
+ """
627
+ with self.transaction() as txn:
628
+ entry = txn.add(item_id, metadata)
629
+ txn.commit()
630
+ return entry
631
+
632
+ def update(
633
+ self,
634
+ item_id: str,
635
+ metadata: dict[str, Any],
636
+ merge: bool = True,
637
+ ) -> IndexEntry:
638
+ """Update a single entry (convenience method).
639
+
640
+ Args:
641
+ item_id: Item to update.
642
+ metadata: New metadata.
643
+ merge: Whether to merge with existing.
644
+
645
+ Returns:
646
+ The updated entry.
647
+ """
648
+ with self.transaction() as txn:
649
+ entry = txn.update(item_id, metadata, merge=merge)
650
+ txn.commit()
651
+ return entry
652
+
653
+ def upsert(self, item_id: str, metadata: dict[str, Any]) -> IndexEntry:
654
+ """Add or update a single entry (convenience method).
655
+
656
+ Args:
657
+ item_id: Item identifier.
658
+ metadata: Metadata to store.
659
+
660
+ Returns:
661
+ The created or updated entry.
662
+ """
663
+ with self.transaction() as txn:
664
+ entry = txn.upsert(item_id, metadata)
665
+ txn.commit()
666
+ return entry
667
+
668
+ def remove(self, item_id: str) -> bool:
669
+ """Remove a single entry (convenience method).
670
+
671
+ Args:
672
+ item_id: Item to remove.
673
+
674
+ Returns:
675
+ True if item existed.
676
+ """
677
+ with self.transaction() as txn:
678
+ result = txn.remove(item_id)
679
+ txn.commit()
680
+ return result
681
+
682
+ def get(self, item_id: str) -> IndexEntry | None:
683
+ """Get an entry by ID.
684
+
685
+ Args:
686
+ item_id: Item to get.
687
+
688
+ Returns:
689
+ Entry if found, None otherwise.
690
+ """
691
+ return self.snapshot().get(item_id)
692
+
693
+ def contains(self, item_id: str) -> bool:
694
+ """Check if an entry exists.
695
+
696
+ Args:
697
+ item_id: Item to check.
698
+
699
+ Returns:
700
+ True if exists.
701
+ """
702
+ return self.snapshot().contains(item_id)
703
+
704
+ def list_ids(self) -> list[str]:
705
+ """List all item IDs.
706
+
707
+ Returns:
708
+ List of item IDs.
709
+ """
710
+ return self.snapshot().list_ids()
711
+
712
+ def count(self) -> int:
713
+ """Get number of entries.
714
+
715
+ Returns:
716
+ Entry count.
717
+ """
718
+ return len(self.snapshot())
719
+
720
+ def clear(self) -> int:
721
+ """Remove all entries.
722
+
723
+ Returns:
724
+ Number of entries removed.
725
+ """
726
+ snapshot = self.snapshot()
727
+ count = len(snapshot)
728
+
729
+ if count > 0:
730
+ with self.transaction() as txn:
731
+ for item_id in snapshot.list_ids():
732
+ txn.remove(item_id)
733
+ txn.commit()
734
+
735
+ return count
736
+
737
+ def rebuild_from_files(
738
+ self,
739
+ file_pattern: str,
740
+ metadata_extractor: Callable[[Path], tuple[str, dict[str, Any]] | None],
741
+ ) -> int:
742
+ """Rebuild index from files in directory.
743
+
744
+ Args:
745
+ file_pattern: Glob pattern for files.
746
+ metadata_extractor: Function to extract (item_id, metadata) from file.
747
+
748
+ Returns:
749
+ Number of entries rebuilt.
750
+ """
751
+ self.initialize()
752
+
753
+ with self.transaction() as txn:
754
+ # Clear existing entries
755
+ for item_id in self.list_ids():
756
+ txn.remove(item_id)
757
+
758
+ # Scan files
759
+ count = 0
760
+ for file_path in self._base_path.glob(file_pattern):
761
+ if file_path.name.startswith("_"):
762
+ continue
763
+
764
+ try:
765
+ result = metadata_extractor(file_path)
766
+ if result:
767
+ item_id, metadata = result
768
+ txn.add(item_id, metadata)
769
+ count += 1
770
+ except Exception:
771
+ continue
772
+
773
+ txn.commit()
774
+
775
+ return count