truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1185 @@
1
+ """Memory usage monitoring and OOM prevention.
2
+
3
+ This module provides comprehensive memory monitoring capabilities:
4
+ - Real-time memory usage tracking
5
+ - OOM (Out of Memory) risk detection and prevention
6
+ - Memory-aware batch processing
7
+ - Memory profiling for optimization
8
+ - Automatic memory cleanup triggers
9
+
10
+ Key features:
11
+ - psutil-based memory tracking
12
+ - Configurable thresholds and alerts
13
+ - Context managers for scoped monitoring
14
+ - Integration with profiling operations
15
+ - Memory leak detection
16
+
17
+ Example:
18
+ from truthound.profiler.memory import (
19
+ MemoryMonitor,
20
+ memory_guard,
21
+ MemoryTracker,
22
+ )
23
+
24
+ # Simple usage with context manager
25
+ with memory_guard(max_memory_mb=1024):
26
+ process_large_dataset(data)
27
+
28
+ # Detailed monitoring
29
+ monitor = MemoryMonitor(threshold_percent=80)
30
+ monitor.start()
31
+
32
+ for batch in data_batches:
33
+ if monitor.is_critical():
34
+ break
35
+ process_batch(batch)
36
+
37
+ report = monitor.stop()
38
+ print(f"Peak memory: {report.peak_mb:.1f} MB")
39
+ """
40
+
41
+ from __future__ import annotations
42
+
43
+ import gc
44
+ import os
45
+ import sys
46
+ import threading
47
+ import time
48
+ import traceback
49
+ import warnings
50
+ from abc import ABC, abstractmethod
51
+ from collections import deque
52
+ from contextlib import contextmanager
53
+ from dataclasses import dataclass, field
54
+ from datetime import datetime, timedelta
55
+ from enum import Enum
56
+ from pathlib import Path
57
+ from typing import Any, Callable, Generic, Iterator, Protocol, TypeVar
58
+
59
+ try:
60
+ import psutil
61
+ PSUTIL_AVAILABLE = True
62
+ except ImportError:
63
+ PSUTIL_AVAILABLE = False
64
+ warnings.warn(
65
+ "psutil not installed. Memory monitoring will use fallback methods. "
66
+ "Install with: pip install psutil",
67
+ UserWarning,
68
+ )
69
+
70
+
71
+ # =============================================================================
72
+ # Types and Enums
73
+ # =============================================================================
74
+
75
+
76
+ class MemoryUnit(str, Enum):
77
+ """Memory size units."""
78
+
79
+ BYTES = "bytes"
80
+ KB = "kb"
81
+ MB = "mb"
82
+ GB = "gb"
83
+
84
+ @classmethod
85
+ def convert(
86
+ cls,
87
+ value: float,
88
+ from_unit: "MemoryUnit",
89
+ to_unit: "MemoryUnit",
90
+ ) -> float:
91
+ """Convert between memory units."""
92
+ # Convert to bytes first
93
+ multipliers = {
94
+ cls.BYTES: 1,
95
+ cls.KB: 1024,
96
+ cls.MB: 1024 * 1024,
97
+ cls.GB: 1024 * 1024 * 1024,
98
+ }
99
+ bytes_value = value * multipliers[from_unit]
100
+ return bytes_value / multipliers[to_unit]
101
+
102
+
103
+ class MemoryStatus(str, Enum):
104
+ """Memory usage status levels."""
105
+
106
+ OK = "ok" # Normal usage
107
+ WARNING = "warning" # Approaching threshold
108
+ CRITICAL = "critical" # Near limit, action needed
109
+ OOM_RISK = "oom_risk" # Immediate OOM risk
110
+
111
+
112
+ class MemoryAction(str, Enum):
113
+ """Actions to take when memory is critical."""
114
+
115
+ NONE = "none"
116
+ WARN = "warn"
117
+ GC_COLLECT = "gc_collect"
118
+ RAISE_ERROR = "raise_error"
119
+ CALLBACK = "callback"
120
+
121
+
122
+ # =============================================================================
123
+ # Exceptions
124
+ # =============================================================================
125
+
126
+
127
+ class MemoryLimitExceeded(Exception):
128
+ """Raised when memory limit is exceeded."""
129
+
130
+ def __init__(
131
+ self,
132
+ current_mb: float,
133
+ limit_mb: float,
134
+ message: str = "",
135
+ ):
136
+ self.current_mb = current_mb
137
+ self.limit_mb = limit_mb
138
+ super().__init__(
139
+ message or f"Memory limit exceeded: {current_mb:.1f} MB > {limit_mb:.1f} MB"
140
+ )
141
+
142
+
143
+ class OOMRiskDetected(Exception):
144
+ """Raised when OOM risk is detected."""
145
+
146
+ def __init__(
147
+ self,
148
+ available_mb: float,
149
+ required_mb: float | None = None,
150
+ ):
151
+ self.available_mb = available_mb
152
+ self.required_mb = required_mb
153
+ msg = f"OOM risk: only {available_mb:.1f} MB available"
154
+ if required_mb:
155
+ msg += f", but {required_mb:.1f} MB required"
156
+ super().__init__(msg)
157
+
158
+
159
+ # =============================================================================
160
+ # Memory Information
161
+ # =============================================================================
162
+
163
+
164
+ @dataclass(frozen=True)
165
+ class MemorySnapshot:
166
+ """Snapshot of memory usage at a point in time."""
167
+
168
+ timestamp: datetime
169
+ process_rss_bytes: int # Resident Set Size
170
+ process_vms_bytes: int # Virtual Memory Size
171
+ process_percent: float # Process memory as % of total
172
+ system_total_bytes: int
173
+ system_available_bytes: int
174
+ system_used_bytes: int
175
+ system_percent: float
176
+
177
+ @property
178
+ def process_rss_mb(self) -> float:
179
+ """Process RSS in MB."""
180
+ return self.process_rss_bytes / (1024 * 1024)
181
+
182
+ @property
183
+ def process_vms_mb(self) -> float:
184
+ """Process VMS in MB."""
185
+ return self.process_vms_bytes / (1024 * 1024)
186
+
187
+ @property
188
+ def system_available_mb(self) -> float:
189
+ """System available memory in MB."""
190
+ return self.system_available_bytes / (1024 * 1024)
191
+
192
+ @property
193
+ def system_total_mb(self) -> float:
194
+ """System total memory in MB."""
195
+ return self.system_total_bytes / (1024 * 1024)
196
+
197
+ def to_dict(self) -> dict[str, Any]:
198
+ """Convert to dictionary."""
199
+ return {
200
+ "timestamp": self.timestamp.isoformat(),
201
+ "process": {
202
+ "rss_bytes": self.process_rss_bytes,
203
+ "rss_mb": self.process_rss_mb,
204
+ "vms_bytes": self.process_vms_bytes,
205
+ "vms_mb": self.process_vms_mb,
206
+ "percent": self.process_percent,
207
+ },
208
+ "system": {
209
+ "total_bytes": self.system_total_bytes,
210
+ "total_mb": self.system_total_mb,
211
+ "available_bytes": self.system_available_bytes,
212
+ "available_mb": self.system_available_mb,
213
+ "used_bytes": self.system_used_bytes,
214
+ "percent": self.system_percent,
215
+ },
216
+ }
217
+
218
+
219
+ @dataclass
220
+ class MemoryReport:
221
+ """Complete memory usage report."""
222
+
223
+ start_time: datetime
224
+ end_time: datetime
225
+ duration_seconds: float
226
+
227
+ # Process memory stats
228
+ initial_rss_mb: float
229
+ final_rss_mb: float
230
+ peak_rss_mb: float
231
+ min_rss_mb: float
232
+ avg_rss_mb: float
233
+
234
+ # System memory stats
235
+ initial_system_percent: float
236
+ final_system_percent: float
237
+ peak_system_percent: float
238
+
239
+ # Status tracking
240
+ status_history: list[tuple[datetime, MemoryStatus]] = field(default_factory=list)
241
+ warnings: list[str] = field(default_factory=list)
242
+ gc_collections: int = 0
243
+
244
+ # Snapshots (if detailed tracking enabled)
245
+ snapshots: list[MemorySnapshot] = field(default_factory=list)
246
+
247
+ @property
248
+ def memory_growth_mb(self) -> float:
249
+ """Memory growth during monitoring period."""
250
+ return self.final_rss_mb - self.initial_rss_mb
251
+
252
+ @property
253
+ def memory_growth_percent(self) -> float:
254
+ """Memory growth as percentage."""
255
+ if self.initial_rss_mb == 0:
256
+ return 0.0
257
+ return (self.memory_growth_mb / self.initial_rss_mb) * 100
258
+
259
+ def to_dict(self) -> dict[str, Any]:
260
+ """Convert to dictionary."""
261
+ return {
262
+ "duration_seconds": self.duration_seconds,
263
+ "start_time": self.start_time.isoformat(),
264
+ "end_time": self.end_time.isoformat(),
265
+ "process": {
266
+ "initial_rss_mb": self.initial_rss_mb,
267
+ "final_rss_mb": self.final_rss_mb,
268
+ "peak_rss_mb": self.peak_rss_mb,
269
+ "min_rss_mb": self.min_rss_mb,
270
+ "avg_rss_mb": self.avg_rss_mb,
271
+ "growth_mb": self.memory_growth_mb,
272
+ "growth_percent": self.memory_growth_percent,
273
+ },
274
+ "system": {
275
+ "initial_percent": self.initial_system_percent,
276
+ "final_percent": self.final_system_percent,
277
+ "peak_percent": self.peak_system_percent,
278
+ },
279
+ "gc_collections": self.gc_collections,
280
+ "warnings": self.warnings,
281
+ "n_snapshots": len(self.snapshots),
282
+ }
283
+
284
+
285
+ # =============================================================================
286
+ # Memory Reader (Platform Abstraction)
287
+ # =============================================================================
288
+
289
+
290
+ class MemoryReader(Protocol):
291
+ """Protocol for reading memory information."""
292
+
293
+ def get_snapshot(self) -> MemorySnapshot:
294
+ """Get current memory snapshot."""
295
+ ...
296
+
297
+
298
+ class PsutilMemoryReader:
299
+ """Memory reader using psutil."""
300
+
301
+ def __init__(self, pid: int | None = None):
302
+ """Initialize reader.
303
+
304
+ Args:
305
+ pid: Process ID to monitor (None for current process)
306
+ """
307
+ if not PSUTIL_AVAILABLE:
308
+ raise ImportError("psutil is required for PsutilMemoryReader")
309
+ self._process = psutil.Process(pid)
310
+
311
+ def get_snapshot(self) -> MemorySnapshot:
312
+ """Get current memory snapshot."""
313
+ proc_mem = self._process.memory_info()
314
+ proc_percent = self._process.memory_percent()
315
+ sys_mem = psutil.virtual_memory()
316
+
317
+ return MemorySnapshot(
318
+ timestamp=datetime.now(),
319
+ process_rss_bytes=proc_mem.rss,
320
+ process_vms_bytes=proc_mem.vms,
321
+ process_percent=proc_percent,
322
+ system_total_bytes=sys_mem.total,
323
+ system_available_bytes=sys_mem.available,
324
+ system_used_bytes=sys_mem.used,
325
+ system_percent=sys_mem.percent,
326
+ )
327
+
328
+
329
+ class FallbackMemoryReader:
330
+ """Fallback memory reader when psutil is not available.
331
+
332
+ Uses resource module on Unix or basic estimation on other platforms.
333
+ """
334
+
335
+ def __init__(self) -> None:
336
+ self._has_resource = False
337
+ try:
338
+ import resource
339
+ self._resource = resource
340
+ self._has_resource = True
341
+ except ImportError:
342
+ pass
343
+
344
+ def get_snapshot(self) -> MemorySnapshot:
345
+ """Get current memory snapshot (limited without psutil)."""
346
+ timestamp = datetime.now()
347
+
348
+ if self._has_resource:
349
+ # Unix systems
350
+ usage = self._resource.getrusage(self._resource.RUSAGE_SELF)
351
+ rss_bytes = usage.ru_maxrss
352
+ # On macOS, ru_maxrss is in bytes; on Linux, it's in KB
353
+ if sys.platform == "darwin":
354
+ pass # Already in bytes
355
+ else:
356
+ rss_bytes *= 1024
357
+ else:
358
+ # Estimate from sys.getsizeof of globals
359
+ rss_bytes = sum(sys.getsizeof(obj) for obj in gc.get_objects()[:1000])
360
+
361
+ # Estimate total system memory (fallback)
362
+ total_bytes = 8 * 1024 * 1024 * 1024 # Assume 8GB
363
+
364
+ return MemorySnapshot(
365
+ timestamp=timestamp,
366
+ process_rss_bytes=rss_bytes,
367
+ process_vms_bytes=rss_bytes, # No VMS info
368
+ process_percent=rss_bytes / total_bytes * 100,
369
+ system_total_bytes=total_bytes,
370
+ system_available_bytes=total_bytes - rss_bytes,
371
+ system_used_bytes=rss_bytes,
372
+ system_percent=rss_bytes / total_bytes * 100,
373
+ )
374
+
375
+
376
+ def get_memory_reader(pid: int | None = None) -> MemoryReader:
377
+ """Get the best available memory reader.
378
+
379
+ Args:
380
+ pid: Process ID to monitor (None for current process)
381
+
382
+ Returns:
383
+ Memory reader instance
384
+ """
385
+ if PSUTIL_AVAILABLE:
386
+ return PsutilMemoryReader(pid)
387
+ return FallbackMemoryReader()
388
+
389
+
390
+ # =============================================================================
391
+ # Memory Monitor Configuration
392
+ # =============================================================================
393
+
394
+
395
+ @dataclass
396
+ class MemoryConfig:
397
+ """Configuration for memory monitoring.
398
+
399
+ Attributes:
400
+ warning_threshold_percent: System memory % to trigger warning
401
+ critical_threshold_percent: System memory % to trigger critical
402
+ oom_threshold_percent: System memory % to consider OOM risk
403
+ max_process_memory_mb: Maximum process memory allowed (None = unlimited)
404
+ check_interval_seconds: How often to check memory
405
+ enable_gc_on_warning: Run GC when warning threshold hit
406
+ enable_gc_on_critical: Run GC when critical threshold hit
407
+ raise_on_oom_risk: Raise exception on OOM risk
408
+ callback_on_warning: Callback when warning threshold hit
409
+ callback_on_critical: Callback when critical threshold hit
410
+ keep_snapshots: Whether to keep all snapshots
411
+ max_snapshots: Maximum snapshots to keep (0 = unlimited)
412
+ """
413
+
414
+ warning_threshold_percent: float = 70.0
415
+ critical_threshold_percent: float = 85.0
416
+ oom_threshold_percent: float = 95.0
417
+ max_process_memory_mb: float | None = None
418
+
419
+ check_interval_seconds: float = 1.0
420
+ enable_gc_on_warning: bool = False
421
+ enable_gc_on_critical: bool = True
422
+ raise_on_oom_risk: bool = True
423
+
424
+ callback_on_warning: Callable[[MemorySnapshot], None] | None = None
425
+ callback_on_critical: Callable[[MemorySnapshot], None] | None = None
426
+
427
+ keep_snapshots: bool = False
428
+ max_snapshots: int = 1000
429
+
430
+ def get_status(self, snapshot: MemorySnapshot) -> MemoryStatus:
431
+ """Determine memory status from snapshot."""
432
+ # Check process limit
433
+ if self.max_process_memory_mb is not None:
434
+ if snapshot.process_rss_mb > self.max_process_memory_mb:
435
+ return MemoryStatus.CRITICAL
436
+
437
+ # Check system memory
438
+ percent = snapshot.system_percent
439
+
440
+ if percent >= self.oom_threshold_percent:
441
+ return MemoryStatus.OOM_RISK
442
+ elif percent >= self.critical_threshold_percent:
443
+ return MemoryStatus.CRITICAL
444
+ elif percent >= self.warning_threshold_percent:
445
+ return MemoryStatus.WARNING
446
+ else:
447
+ return MemoryStatus.OK
448
+
449
+
450
+ # =============================================================================
451
+ # Memory Monitor
452
+ # =============================================================================
453
+
454
+
455
+ class MemoryMonitor:
456
+ """Real-time memory usage monitor.
457
+
458
+ Monitors memory usage and provides alerts when thresholds are exceeded.
459
+
460
+ Example:
461
+ monitor = MemoryMonitor(
462
+ warning_threshold_percent=70,
463
+ critical_threshold_percent=85,
464
+ )
465
+
466
+ monitor.start()
467
+
468
+ # Do work...
469
+ for batch in batches:
470
+ if monitor.is_critical():
471
+ print("Memory critical, stopping")
472
+ break
473
+ process(batch)
474
+
475
+ report = monitor.stop()
476
+ print(f"Peak memory: {report.peak_rss_mb:.1f} MB")
477
+ """
478
+
479
+ def __init__(
480
+ self,
481
+ config: MemoryConfig | None = None,
482
+ warning_threshold_percent: float = 70.0,
483
+ critical_threshold_percent: float = 85.0,
484
+ max_process_memory_mb: float | None = None,
485
+ check_interval_seconds: float = 1.0,
486
+ ):
487
+ """Initialize monitor.
488
+
489
+ Args:
490
+ config: Full configuration (overrides other params)
491
+ warning_threshold_percent: Warning threshold
492
+ critical_threshold_percent: Critical threshold
493
+ max_process_memory_mb: Max process memory
494
+ check_interval_seconds: Check interval
495
+ """
496
+ if config is not None:
497
+ self._config = config
498
+ else:
499
+ self._config = MemoryConfig(
500
+ warning_threshold_percent=warning_threshold_percent,
501
+ critical_threshold_percent=critical_threshold_percent,
502
+ max_process_memory_mb=max_process_memory_mb,
503
+ check_interval_seconds=check_interval_seconds,
504
+ )
505
+
506
+ self._reader = get_memory_reader()
507
+ self._running = False
508
+ self._thread: threading.Thread | None = None
509
+ self._lock = threading.Lock()
510
+
511
+ # State
512
+ self._snapshots: deque[MemorySnapshot] = deque(
513
+ maxlen=self._config.max_snapshots if self._config.max_snapshots > 0 else None
514
+ )
515
+ self._start_time: datetime | None = None
516
+ self._end_time: datetime | None = None
517
+ self._initial_snapshot: MemorySnapshot | None = None
518
+ self._peak_rss_bytes: int = 0
519
+ self._min_rss_bytes: int = float("inf") # type: ignore
520
+ self._sum_rss_bytes: int = 0
521
+ self._sample_count: int = 0
522
+ self._gc_count: int = 0
523
+ self._status_history: list[tuple[datetime, MemoryStatus]] = []
524
+ self._current_status: MemoryStatus = MemoryStatus.OK
525
+ self._warnings: list[str] = []
526
+
527
+ @property
528
+ def is_running(self) -> bool:
529
+ """Check if monitor is running."""
530
+ return self._running
531
+
532
+ @property
533
+ def current_status(self) -> MemoryStatus:
534
+ """Get current memory status."""
535
+ return self._current_status
536
+
537
+ def start(self) -> None:
538
+ """Start monitoring in background thread."""
539
+ if self._running:
540
+ return
541
+
542
+ self._running = True
543
+ self._start_time = datetime.now()
544
+ self._initial_snapshot = self._reader.get_snapshot()
545
+ self._peak_rss_bytes = self._initial_snapshot.process_rss_bytes
546
+ self._min_rss_bytes = self._initial_snapshot.process_rss_bytes
547
+
548
+ if self._config.keep_snapshots:
549
+ self._snapshots.append(self._initial_snapshot)
550
+
551
+ self._thread = threading.Thread(target=self._monitor_loop, daemon=True)
552
+ self._thread.start()
553
+
554
+ def stop(self) -> MemoryReport:
555
+ """Stop monitoring and return report."""
556
+ self._running = False
557
+ self._end_time = datetime.now()
558
+
559
+ if self._thread is not None:
560
+ self._thread.join(timeout=2.0)
561
+ self._thread = None
562
+
563
+ return self._generate_report()
564
+
565
+ def check(self) -> MemorySnapshot:
566
+ """Take a manual memory check.
567
+
568
+ Returns:
569
+ Current memory snapshot
570
+ """
571
+ snapshot = self._reader.get_snapshot()
572
+ self._process_snapshot(snapshot)
573
+ return snapshot
574
+
575
+ def is_ok(self) -> bool:
576
+ """Check if memory status is OK."""
577
+ return self._current_status == MemoryStatus.OK
578
+
579
+ def is_warning(self) -> bool:
580
+ """Check if memory is at warning level."""
581
+ return self._current_status == MemoryStatus.WARNING
582
+
583
+ def is_critical(self) -> bool:
584
+ """Check if memory is at critical level."""
585
+ return self._current_status in (
586
+ MemoryStatus.CRITICAL,
587
+ MemoryStatus.OOM_RISK,
588
+ )
589
+
590
+ def get_available_mb(self) -> float:
591
+ """Get available system memory in MB."""
592
+ snapshot = self._reader.get_snapshot()
593
+ return snapshot.system_available_mb
594
+
595
+ def get_process_memory_mb(self) -> float:
596
+ """Get current process memory in MB."""
597
+ snapshot = self._reader.get_snapshot()
598
+ return snapshot.process_rss_mb
599
+
600
+ def _monitor_loop(self) -> None:
601
+ """Background monitoring loop."""
602
+ while self._running:
603
+ try:
604
+ snapshot = self._reader.get_snapshot()
605
+ self._process_snapshot(snapshot)
606
+ except Exception as e:
607
+ self._warnings.append(f"Monitor error: {e}")
608
+
609
+ time.sleep(self._config.check_interval_seconds)
610
+
611
+ def _process_snapshot(self, snapshot: MemorySnapshot) -> None:
612
+ """Process a memory snapshot."""
613
+ with self._lock:
614
+ # Update stats
615
+ self._peak_rss_bytes = max(self._peak_rss_bytes, snapshot.process_rss_bytes)
616
+ self._min_rss_bytes = min(self._min_rss_bytes, snapshot.process_rss_bytes)
617
+ self._sum_rss_bytes += snapshot.process_rss_bytes
618
+ self._sample_count += 1
619
+
620
+ if self._config.keep_snapshots:
621
+ self._snapshots.append(snapshot)
622
+
623
+ # Check status
624
+ new_status = self._config.get_status(snapshot)
625
+
626
+ if new_status != self._current_status:
627
+ self._status_history.append((snapshot.timestamp, new_status))
628
+ self._current_status = new_status
629
+
630
+ # Take actions based on status
631
+ if new_status == MemoryStatus.WARNING:
632
+ if self._config.callback_on_warning:
633
+ try:
634
+ self._config.callback_on_warning(snapshot)
635
+ except Exception:
636
+ pass
637
+
638
+ if self._config.enable_gc_on_warning:
639
+ gc.collect()
640
+ self._gc_count += 1
641
+
642
+ elif new_status == MemoryStatus.CRITICAL:
643
+ if self._config.callback_on_critical:
644
+ try:
645
+ self._config.callback_on_critical(snapshot)
646
+ except Exception:
647
+ pass
648
+
649
+ if self._config.enable_gc_on_critical:
650
+ gc.collect()
651
+ self._gc_count += 1
652
+
653
+ elif new_status == MemoryStatus.OOM_RISK:
654
+ if self._config.raise_on_oom_risk:
655
+ raise OOMRiskDetected(snapshot.system_available_mb)
656
+
657
+ def _generate_report(self) -> MemoryReport:
658
+ """Generate memory report."""
659
+ with self._lock:
660
+ final_snapshot = self._reader.get_snapshot()
661
+
662
+ return MemoryReport(
663
+ start_time=self._start_time or datetime.now(),
664
+ end_time=self._end_time or datetime.now(),
665
+ duration_seconds=(
666
+ (self._end_time - self._start_time).total_seconds()
667
+ if self._start_time and self._end_time
668
+ else 0.0
669
+ ),
670
+ initial_rss_mb=(
671
+ self._initial_snapshot.process_rss_mb
672
+ if self._initial_snapshot
673
+ else 0.0
674
+ ),
675
+ final_rss_mb=final_snapshot.process_rss_mb,
676
+ peak_rss_mb=self._peak_rss_bytes / (1024 * 1024),
677
+ min_rss_mb=self._min_rss_bytes / (1024 * 1024),
678
+ avg_rss_mb=(
679
+ self._sum_rss_bytes / self._sample_count / (1024 * 1024)
680
+ if self._sample_count > 0
681
+ else 0.0
682
+ ),
683
+ initial_system_percent=(
684
+ self._initial_snapshot.system_percent
685
+ if self._initial_snapshot
686
+ else 0.0
687
+ ),
688
+ final_system_percent=final_snapshot.system_percent,
689
+ peak_system_percent=max(
690
+ s.system_percent for s in self._snapshots
691
+ ) if self._snapshots else final_snapshot.system_percent,
692
+ status_history=list(self._status_history),
693
+ warnings=list(self._warnings),
694
+ gc_collections=self._gc_count,
695
+ snapshots=list(self._snapshots) if self._config.keep_snapshots else [],
696
+ )
697
+
698
+
699
+ # =============================================================================
700
+ # Memory Guard Context Manager
701
+ # =============================================================================
702
+
703
+
704
+ @contextmanager
705
+ def memory_guard(
706
+ max_memory_mb: float | None = None,
707
+ warning_threshold_percent: float = 70.0,
708
+ critical_threshold_percent: float = 85.0,
709
+ raise_on_critical: bool = False,
710
+ callback: Callable[[MemorySnapshot], None] | None = None,
711
+ ) -> Iterator[MemoryMonitor]:
712
+ """Context manager for memory-guarded execution.
713
+
714
+ Monitors memory usage during the context and optionally
715
+ raises exceptions if limits are exceeded.
716
+
717
+ Args:
718
+ max_memory_mb: Maximum process memory allowed
719
+ warning_threshold_percent: Warning threshold
720
+ critical_threshold_percent: Critical threshold
721
+ raise_on_critical: Whether to raise on critical status
722
+ callback: Callback on warning/critical
723
+
724
+ Yields:
725
+ MemoryMonitor instance
726
+
727
+ Example:
728
+ with memory_guard(max_memory_mb=1024) as monitor:
729
+ process_data(data)
730
+ if monitor.is_critical():
731
+ cleanup()
732
+ """
733
+ config = MemoryConfig(
734
+ warning_threshold_percent=warning_threshold_percent,
735
+ critical_threshold_percent=critical_threshold_percent,
736
+ max_process_memory_mb=max_memory_mb,
737
+ callback_on_warning=callback,
738
+ callback_on_critical=callback,
739
+ )
740
+
741
+ monitor = MemoryMonitor(config=config)
742
+ monitor.start()
743
+
744
+ try:
745
+ yield monitor
746
+ finally:
747
+ report = monitor.stop()
748
+
749
+ if raise_on_critical and any(
750
+ status in (MemoryStatus.CRITICAL, MemoryStatus.OOM_RISK)
751
+ for _, status in report.status_history
752
+ ):
753
+ raise MemoryLimitExceeded(
754
+ report.peak_rss_mb,
755
+ max_memory_mb or float("inf"),
756
+ f"Memory exceeded critical threshold (peak: {report.peak_rss_mb:.1f} MB)",
757
+ )
758
+
759
+
760
+ # =============================================================================
761
+ # Memory Tracker (Lightweight)
762
+ # =============================================================================
763
+
764
+
765
+ class MemoryTracker:
766
+ """Lightweight memory tracker for specific operations.
767
+
768
+ Unlike MemoryMonitor, this doesn't run a background thread.
769
+ Instead, it takes snapshots on demand.
770
+
771
+ Example:
772
+ tracker = MemoryTracker()
773
+
774
+ tracker.checkpoint("start")
775
+ do_operation()
776
+ tracker.checkpoint("after_operation")
777
+
778
+ print(tracker.get_delta("start", "after_operation"))
779
+ """
780
+
781
+ def __init__(self):
782
+ """Initialize tracker."""
783
+ self._reader = get_memory_reader()
784
+ self._checkpoints: dict[str, MemorySnapshot] = {}
785
+
786
+ def checkpoint(self, name: str) -> MemorySnapshot:
787
+ """Take a memory checkpoint.
788
+
789
+ Args:
790
+ name: Checkpoint name
791
+
792
+ Returns:
793
+ Memory snapshot
794
+ """
795
+ snapshot = self._reader.get_snapshot()
796
+ self._checkpoints[name] = snapshot
797
+ return snapshot
798
+
799
+ def get_checkpoint(self, name: str) -> MemorySnapshot | None:
800
+ """Get a checkpoint by name."""
801
+ return self._checkpoints.get(name)
802
+
803
+ def get_delta(
804
+ self,
805
+ from_name: str,
806
+ to_name: str,
807
+ ) -> dict[str, float]:
808
+ """Get memory delta between checkpoints.
809
+
810
+ Args:
811
+ from_name: Starting checkpoint
812
+ to_name: Ending checkpoint
813
+
814
+ Returns:
815
+ Dictionary with memory deltas
816
+ """
817
+ from_snap = self._checkpoints.get(from_name)
818
+ to_snap = self._checkpoints.get(to_name)
819
+
820
+ if not from_snap or not to_snap:
821
+ return {}
822
+
823
+ return {
824
+ "rss_delta_mb": to_snap.process_rss_mb - from_snap.process_rss_mb,
825
+ "vms_delta_mb": to_snap.process_vms_mb - from_snap.process_vms_mb,
826
+ "system_delta_percent": to_snap.system_percent - from_snap.system_percent,
827
+ "duration_seconds": (to_snap.timestamp - from_snap.timestamp).total_seconds(),
828
+ }
829
+
830
+ def get_all_checkpoints(self) -> dict[str, MemorySnapshot]:
831
+ """Get all checkpoints."""
832
+ return dict(self._checkpoints)
833
+
834
+ def clear(self) -> None:
835
+ """Clear all checkpoints."""
836
+ self._checkpoints.clear()
837
+
838
+ def summary(self) -> dict[str, Any]:
839
+ """Get summary of all checkpoints."""
840
+ if not self._checkpoints:
841
+ return {}
842
+
843
+ snapshots = list(self._checkpoints.values())
844
+ rss_values = [s.process_rss_mb for s in snapshots]
845
+
846
+ return {
847
+ "n_checkpoints": len(snapshots),
848
+ "checkpoints": list(self._checkpoints.keys()),
849
+ "min_rss_mb": min(rss_values),
850
+ "max_rss_mb": max(rss_values),
851
+ "first_checkpoint": min(self._checkpoints.keys()),
852
+ "last_checkpoint": max(self._checkpoints.keys()),
853
+ }
854
+
855
+
856
+ # =============================================================================
857
+ # Memory-Aware Batch Processor
858
+ # =============================================================================
859
+
860
+
861
+ T = TypeVar("T")
862
+ R = TypeVar("R")
863
+
864
+
865
+ class MemoryAwareBatchProcessor(Generic[T, R]):
866
+ """Batch processor that adapts to memory constraints.
867
+
868
+ Automatically adjusts batch size based on available memory.
869
+
870
+ Example:
871
+ processor = MemoryAwareBatchProcessor(
872
+ process_fn=process_batch,
873
+ max_memory_percent=80,
874
+ )
875
+
876
+ results = processor.process(all_items)
877
+ """
878
+
879
+ def __init__(
880
+ self,
881
+ process_fn: Callable[[list[T]], list[R]],
882
+ initial_batch_size: int = 1000,
883
+ min_batch_size: int = 10,
884
+ max_batch_size: int = 100000,
885
+ max_memory_percent: float = 80.0,
886
+ memory_check_frequency: int = 1,
887
+ ):
888
+ """Initialize processor.
889
+
890
+ Args:
891
+ process_fn: Function to process a batch
892
+ initial_batch_size: Starting batch size
893
+ min_batch_size: Minimum batch size
894
+ max_batch_size: Maximum batch size
895
+ max_memory_percent: Maximum memory usage percent
896
+ memory_check_frequency: Check memory every N batches
897
+ """
898
+ self.process_fn = process_fn
899
+ self.initial_batch_size = initial_batch_size
900
+ self.min_batch_size = min_batch_size
901
+ self.max_batch_size = max_batch_size
902
+ self.max_memory_percent = max_memory_percent
903
+ self.memory_check_frequency = memory_check_frequency
904
+
905
+ self._reader = get_memory_reader()
906
+ self._current_batch_size = initial_batch_size
907
+ self._batch_count = 0
908
+
909
+ def process(
910
+ self,
911
+ items: list[T],
912
+ callback: Callable[[int, int], None] | None = None,
913
+ ) -> list[R]:
914
+ """Process all items in adaptive batches.
915
+
916
+ Args:
917
+ items: Items to process
918
+ callback: Progress callback (processed, total)
919
+
920
+ Returns:
921
+ Combined results from all batches
922
+ """
923
+ results: list[R] = []
924
+ total = len(items)
925
+ processed = 0
926
+
927
+ while processed < total:
928
+ # Get current batch
929
+ batch_end = min(processed + self._current_batch_size, total)
930
+ batch = items[processed:batch_end]
931
+
932
+ # Process batch
933
+ batch_results = self.process_fn(batch)
934
+ results.extend(batch_results)
935
+
936
+ processed = batch_end
937
+ self._batch_count += 1
938
+
939
+ # Report progress
940
+ if callback:
941
+ callback(processed, total)
942
+
943
+ # Check memory and adjust batch size
944
+ if self._batch_count % self.memory_check_frequency == 0:
945
+ self._adjust_batch_size()
946
+
947
+ # Run GC periodically
948
+ if self._batch_count % 10 == 0:
949
+ gc.collect()
950
+
951
+ return results
952
+
953
+ def _adjust_batch_size(self) -> None:
954
+ """Adjust batch size based on memory usage."""
955
+ snapshot = self._reader.get_snapshot()
956
+ current_percent = snapshot.system_percent
957
+
958
+ if current_percent >= self.max_memory_percent:
959
+ # Reduce batch size
960
+ new_size = max(
961
+ self.min_batch_size,
962
+ int(self._current_batch_size * 0.7),
963
+ )
964
+ self._current_batch_size = new_size
965
+
966
+ elif current_percent < self.max_memory_percent * 0.7:
967
+ # Can increase batch size
968
+ new_size = min(
969
+ self.max_batch_size,
970
+ int(self._current_batch_size * 1.3),
971
+ )
972
+ self._current_batch_size = new_size
973
+
974
+
975
+ # =============================================================================
976
+ # Memory Leak Detector
977
+ # =============================================================================
978
+
979
+
980
+ @dataclass
981
+ class LeakSuspect:
982
+ """Potential memory leak information."""
983
+
984
+ type_name: str
985
+ count_initial: int
986
+ count_final: int
987
+ count_delta: int
988
+ growth_percent: float
989
+ sample_referrers: list[str] = field(default_factory=list)
990
+
991
+
992
+ class MemoryLeakDetector:
993
+ """Detects potential memory leaks by tracking object counts.
994
+
995
+ Example:
996
+ detector = MemoryLeakDetector()
997
+
998
+ detector.start()
999
+ do_operations()
1000
+ suspects = detector.detect()
1001
+
1002
+ for suspect in suspects:
1003
+ print(f"Possible leak: {suspect.type_name} +{suspect.count_delta}")
1004
+ """
1005
+
1006
+ def __init__(
1007
+ self,
1008
+ min_growth_count: int = 100,
1009
+ min_growth_percent: float = 10.0,
1010
+ ):
1011
+ """Initialize detector.
1012
+
1013
+ Args:
1014
+ min_growth_count: Minimum object count growth to flag
1015
+ min_growth_percent: Minimum growth percentage to flag
1016
+ """
1017
+ self.min_growth_count = min_growth_count
1018
+ self.min_growth_percent = min_growth_percent
1019
+ self._initial_counts: dict[str, int] = {}
1020
+
1021
+ def start(self) -> None:
1022
+ """Start tracking object counts."""
1023
+ gc.collect()
1024
+ self._initial_counts = self._count_objects()
1025
+
1026
+ def detect(self) -> list[LeakSuspect]:
1027
+ """Detect potential memory leaks.
1028
+
1029
+ Returns:
1030
+ List of suspected leaks
1031
+ """
1032
+ gc.collect()
1033
+ final_counts = self._count_objects()
1034
+
1035
+ suspects = []
1036
+
1037
+ for type_name, final_count in final_counts.items():
1038
+ initial_count = self._initial_counts.get(type_name, 0)
1039
+ delta = final_count - initial_count
1040
+
1041
+ if delta < self.min_growth_count:
1042
+ continue
1043
+
1044
+ if initial_count > 0:
1045
+ growth_percent = (delta / initial_count) * 100
1046
+ else:
1047
+ growth_percent = 100.0
1048
+
1049
+ if growth_percent < self.min_growth_percent:
1050
+ continue
1051
+
1052
+ suspects.append(LeakSuspect(
1053
+ type_name=type_name,
1054
+ count_initial=initial_count,
1055
+ count_final=final_count,
1056
+ count_delta=delta,
1057
+ growth_percent=growth_percent,
1058
+ ))
1059
+
1060
+ # Sort by delta (largest first)
1061
+ suspects.sort(key=lambda s: s.count_delta, reverse=True)
1062
+ return suspects
1063
+
1064
+ def _count_objects(self) -> dict[str, int]:
1065
+ """Count objects by type."""
1066
+ counts: dict[str, int] = {}
1067
+
1068
+ for obj in gc.get_objects():
1069
+ try:
1070
+ type_name = type(obj).__name__
1071
+ counts[type_name] = counts.get(type_name, 0) + 1
1072
+ except Exception:
1073
+ pass
1074
+
1075
+ return counts
1076
+
1077
+
1078
+ # =============================================================================
1079
+ # Convenience Functions
1080
+ # =============================================================================
1081
+
1082
+
1083
+ def get_memory_usage() -> dict[str, float]:
1084
+ """Get current memory usage.
1085
+
1086
+ Returns:
1087
+ Dictionary with memory information
1088
+ """
1089
+ reader = get_memory_reader()
1090
+ snapshot = reader.get_snapshot()
1091
+
1092
+ return {
1093
+ "process_rss_mb": snapshot.process_rss_mb,
1094
+ "process_vms_mb": snapshot.process_vms_mb,
1095
+ "process_percent": snapshot.process_percent,
1096
+ "system_available_mb": snapshot.system_available_mb,
1097
+ "system_total_mb": snapshot.system_total_mb,
1098
+ "system_percent": snapshot.system_percent,
1099
+ }
1100
+
1101
+
1102
+ def check_memory_available(
1103
+ required_mb: float,
1104
+ safety_margin: float = 0.2,
1105
+ ) -> bool:
1106
+ """Check if enough memory is available.
1107
+
1108
+ Args:
1109
+ required_mb: Required memory in MB
1110
+ safety_margin: Safety margin (0.2 = 20% buffer)
1111
+
1112
+ Returns:
1113
+ True if enough memory is available
1114
+ """
1115
+ reader = get_memory_reader()
1116
+ snapshot = reader.get_snapshot()
1117
+
1118
+ available = snapshot.system_available_mb
1119
+ required_with_margin = required_mb * (1 + safety_margin)
1120
+
1121
+ return available >= required_with_margin
1122
+
1123
+
1124
+ def estimate_batch_size(
1125
+ item_size_bytes: int,
1126
+ target_memory_mb: float = 100,
1127
+ ) -> int:
1128
+ """Estimate optimal batch size for given item size.
1129
+
1130
+ Args:
1131
+ item_size_bytes: Size of each item in bytes
1132
+ target_memory_mb: Target memory usage per batch
1133
+
1134
+ Returns:
1135
+ Recommended batch size
1136
+ """
1137
+ target_bytes = target_memory_mb * 1024 * 1024
1138
+ return max(1, int(target_bytes / item_size_bytes))
1139
+
1140
+
1141
+ def force_gc() -> dict[str, int]:
1142
+ """Force garbage collection and return stats.
1143
+
1144
+ Returns:
1145
+ GC collection statistics
1146
+ """
1147
+ before = get_memory_usage()
1148
+
1149
+ collected = {
1150
+ "gen0": gc.collect(0),
1151
+ "gen1": gc.collect(1),
1152
+ "gen2": gc.collect(2),
1153
+ }
1154
+
1155
+ after = get_memory_usage()
1156
+
1157
+ collected["freed_mb"] = before["process_rss_mb"] - after["process_rss_mb"]
1158
+
1159
+ return collected
1160
+
1161
+
1162
+ def monitor_function(
1163
+ func: Callable[..., R],
1164
+ *args: Any,
1165
+ **kwargs: Any,
1166
+ ) -> tuple[R, MemoryReport]:
1167
+ """Execute a function with memory monitoring.
1168
+
1169
+ Args:
1170
+ func: Function to execute
1171
+ *args: Function arguments
1172
+ **kwargs: Function keyword arguments
1173
+
1174
+ Returns:
1175
+ Tuple of (function result, memory report)
1176
+ """
1177
+ monitor = MemoryMonitor()
1178
+ monitor.start()
1179
+
1180
+ try:
1181
+ result = func(*args, **kwargs)
1182
+ finally:
1183
+ report = monitor.stop()
1184
+
1185
+ return result, report