truthound 1.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (877) hide show
  1. truthound/__init__.py +162 -0
  2. truthound/adapters.py +100 -0
  3. truthound/api.py +365 -0
  4. truthound/audit/__init__.py +248 -0
  5. truthound/audit/core.py +967 -0
  6. truthound/audit/filters.py +620 -0
  7. truthound/audit/formatters.py +707 -0
  8. truthound/audit/logger.py +902 -0
  9. truthound/audit/middleware.py +571 -0
  10. truthound/audit/storage.py +1083 -0
  11. truthound/benchmark/__init__.py +123 -0
  12. truthound/benchmark/base.py +757 -0
  13. truthound/benchmark/comparison.py +635 -0
  14. truthound/benchmark/generators.py +706 -0
  15. truthound/benchmark/reporters.py +718 -0
  16. truthound/benchmark/runner.py +635 -0
  17. truthound/benchmark/scenarios.py +712 -0
  18. truthound/cache.py +252 -0
  19. truthound/checkpoint/__init__.py +136 -0
  20. truthound/checkpoint/actions/__init__.py +164 -0
  21. truthound/checkpoint/actions/base.py +324 -0
  22. truthound/checkpoint/actions/custom.py +234 -0
  23. truthound/checkpoint/actions/discord_notify.py +290 -0
  24. truthound/checkpoint/actions/email_notify.py +405 -0
  25. truthound/checkpoint/actions/github_action.py +406 -0
  26. truthound/checkpoint/actions/opsgenie.py +1499 -0
  27. truthound/checkpoint/actions/pagerduty.py +226 -0
  28. truthound/checkpoint/actions/slack_notify.py +233 -0
  29. truthound/checkpoint/actions/store_result.py +249 -0
  30. truthound/checkpoint/actions/teams_notify.py +1570 -0
  31. truthound/checkpoint/actions/telegram_notify.py +419 -0
  32. truthound/checkpoint/actions/update_docs.py +552 -0
  33. truthound/checkpoint/actions/webhook.py +293 -0
  34. truthound/checkpoint/analytics/__init__.py +147 -0
  35. truthound/checkpoint/analytics/aggregations/__init__.py +23 -0
  36. truthound/checkpoint/analytics/aggregations/rollup.py +481 -0
  37. truthound/checkpoint/analytics/aggregations/time_bucket.py +306 -0
  38. truthound/checkpoint/analytics/analyzers/__init__.py +17 -0
  39. truthound/checkpoint/analytics/analyzers/anomaly.py +386 -0
  40. truthound/checkpoint/analytics/analyzers/base.py +270 -0
  41. truthound/checkpoint/analytics/analyzers/forecast.py +421 -0
  42. truthound/checkpoint/analytics/analyzers/trend.py +314 -0
  43. truthound/checkpoint/analytics/models.py +292 -0
  44. truthound/checkpoint/analytics/protocols.py +549 -0
  45. truthound/checkpoint/analytics/service.py +718 -0
  46. truthound/checkpoint/analytics/stores/__init__.py +16 -0
  47. truthound/checkpoint/analytics/stores/base.py +306 -0
  48. truthound/checkpoint/analytics/stores/memory_store.py +353 -0
  49. truthound/checkpoint/analytics/stores/sqlite_store.py +557 -0
  50. truthound/checkpoint/analytics/stores/timescale_store.py +501 -0
  51. truthound/checkpoint/async_actions.py +794 -0
  52. truthound/checkpoint/async_base.py +708 -0
  53. truthound/checkpoint/async_checkpoint.py +617 -0
  54. truthound/checkpoint/async_runner.py +639 -0
  55. truthound/checkpoint/checkpoint.py +527 -0
  56. truthound/checkpoint/ci/__init__.py +61 -0
  57. truthound/checkpoint/ci/detector.py +355 -0
  58. truthound/checkpoint/ci/reporter.py +436 -0
  59. truthound/checkpoint/ci/templates.py +454 -0
  60. truthound/checkpoint/circuitbreaker/__init__.py +133 -0
  61. truthound/checkpoint/circuitbreaker/breaker.py +542 -0
  62. truthound/checkpoint/circuitbreaker/core.py +252 -0
  63. truthound/checkpoint/circuitbreaker/detection.py +459 -0
  64. truthound/checkpoint/circuitbreaker/middleware.py +389 -0
  65. truthound/checkpoint/circuitbreaker/registry.py +357 -0
  66. truthound/checkpoint/distributed/__init__.py +139 -0
  67. truthound/checkpoint/distributed/backends/__init__.py +35 -0
  68. truthound/checkpoint/distributed/backends/celery_backend.py +503 -0
  69. truthound/checkpoint/distributed/backends/kubernetes_backend.py +696 -0
  70. truthound/checkpoint/distributed/backends/local_backend.py +397 -0
  71. truthound/checkpoint/distributed/backends/ray_backend.py +625 -0
  72. truthound/checkpoint/distributed/base.py +774 -0
  73. truthound/checkpoint/distributed/orchestrator.py +765 -0
  74. truthound/checkpoint/distributed/protocols.py +842 -0
  75. truthound/checkpoint/distributed/registry.py +449 -0
  76. truthound/checkpoint/idempotency/__init__.py +120 -0
  77. truthound/checkpoint/idempotency/core.py +295 -0
  78. truthound/checkpoint/idempotency/fingerprint.py +454 -0
  79. truthound/checkpoint/idempotency/locking.py +604 -0
  80. truthound/checkpoint/idempotency/service.py +592 -0
  81. truthound/checkpoint/idempotency/stores.py +653 -0
  82. truthound/checkpoint/monitoring/__init__.py +134 -0
  83. truthound/checkpoint/monitoring/aggregators/__init__.py +15 -0
  84. truthound/checkpoint/monitoring/aggregators/base.py +372 -0
  85. truthound/checkpoint/monitoring/aggregators/realtime.py +300 -0
  86. truthound/checkpoint/monitoring/aggregators/window.py +493 -0
  87. truthound/checkpoint/monitoring/collectors/__init__.py +17 -0
  88. truthound/checkpoint/monitoring/collectors/base.py +257 -0
  89. truthound/checkpoint/monitoring/collectors/memory_collector.py +617 -0
  90. truthound/checkpoint/monitoring/collectors/prometheus_collector.py +451 -0
  91. truthound/checkpoint/monitoring/collectors/redis_collector.py +518 -0
  92. truthound/checkpoint/monitoring/events.py +410 -0
  93. truthound/checkpoint/monitoring/protocols.py +636 -0
  94. truthound/checkpoint/monitoring/service.py +578 -0
  95. truthound/checkpoint/monitoring/views/__init__.py +17 -0
  96. truthound/checkpoint/monitoring/views/base.py +172 -0
  97. truthound/checkpoint/monitoring/views/queue_view.py +220 -0
  98. truthound/checkpoint/monitoring/views/task_view.py +240 -0
  99. truthound/checkpoint/monitoring/views/worker_view.py +263 -0
  100. truthound/checkpoint/registry.py +337 -0
  101. truthound/checkpoint/runner.py +356 -0
  102. truthound/checkpoint/transaction/__init__.py +133 -0
  103. truthound/checkpoint/transaction/base.py +389 -0
  104. truthound/checkpoint/transaction/compensatable.py +537 -0
  105. truthound/checkpoint/transaction/coordinator.py +576 -0
  106. truthound/checkpoint/transaction/executor.py +622 -0
  107. truthound/checkpoint/transaction/idempotency.py +534 -0
  108. truthound/checkpoint/transaction/saga/__init__.py +143 -0
  109. truthound/checkpoint/transaction/saga/builder.py +584 -0
  110. truthound/checkpoint/transaction/saga/definition.py +515 -0
  111. truthound/checkpoint/transaction/saga/event_store.py +542 -0
  112. truthound/checkpoint/transaction/saga/patterns.py +833 -0
  113. truthound/checkpoint/transaction/saga/runner.py +718 -0
  114. truthound/checkpoint/transaction/saga/state_machine.py +793 -0
  115. truthound/checkpoint/transaction/saga/strategies.py +780 -0
  116. truthound/checkpoint/transaction/saga/testing.py +886 -0
  117. truthound/checkpoint/triggers/__init__.py +58 -0
  118. truthound/checkpoint/triggers/base.py +237 -0
  119. truthound/checkpoint/triggers/event.py +385 -0
  120. truthound/checkpoint/triggers/schedule.py +355 -0
  121. truthound/cli.py +2358 -0
  122. truthound/cli_modules/__init__.py +124 -0
  123. truthound/cli_modules/advanced/__init__.py +45 -0
  124. truthound/cli_modules/advanced/benchmark.py +343 -0
  125. truthound/cli_modules/advanced/docs.py +225 -0
  126. truthound/cli_modules/advanced/lineage.py +209 -0
  127. truthound/cli_modules/advanced/ml.py +320 -0
  128. truthound/cli_modules/advanced/realtime.py +196 -0
  129. truthound/cli_modules/checkpoint/__init__.py +46 -0
  130. truthound/cli_modules/checkpoint/init.py +114 -0
  131. truthound/cli_modules/checkpoint/list.py +71 -0
  132. truthound/cli_modules/checkpoint/run.py +159 -0
  133. truthound/cli_modules/checkpoint/validate.py +67 -0
  134. truthound/cli_modules/common/__init__.py +71 -0
  135. truthound/cli_modules/common/errors.py +414 -0
  136. truthound/cli_modules/common/options.py +419 -0
  137. truthound/cli_modules/common/output.py +507 -0
  138. truthound/cli_modules/common/protocol.py +552 -0
  139. truthound/cli_modules/core/__init__.py +48 -0
  140. truthound/cli_modules/core/check.py +123 -0
  141. truthound/cli_modules/core/compare.py +104 -0
  142. truthound/cli_modules/core/learn.py +57 -0
  143. truthound/cli_modules/core/mask.py +77 -0
  144. truthound/cli_modules/core/profile.py +65 -0
  145. truthound/cli_modules/core/scan.py +61 -0
  146. truthound/cli_modules/profiler/__init__.py +51 -0
  147. truthound/cli_modules/profiler/auto_profile.py +175 -0
  148. truthound/cli_modules/profiler/metadata.py +107 -0
  149. truthound/cli_modules/profiler/suite.py +283 -0
  150. truthound/cli_modules/registry.py +431 -0
  151. truthound/cli_modules/scaffolding/__init__.py +89 -0
  152. truthound/cli_modules/scaffolding/base.py +631 -0
  153. truthound/cli_modules/scaffolding/commands.py +545 -0
  154. truthound/cli_modules/scaffolding/plugins.py +1072 -0
  155. truthound/cli_modules/scaffolding/reporters.py +594 -0
  156. truthound/cli_modules/scaffolding/validators.py +1127 -0
  157. truthound/common/__init__.py +18 -0
  158. truthound/common/resilience/__init__.py +130 -0
  159. truthound/common/resilience/bulkhead.py +266 -0
  160. truthound/common/resilience/circuit_breaker.py +516 -0
  161. truthound/common/resilience/composite.py +332 -0
  162. truthound/common/resilience/config.py +292 -0
  163. truthound/common/resilience/protocols.py +217 -0
  164. truthound/common/resilience/rate_limiter.py +404 -0
  165. truthound/common/resilience/retry.py +341 -0
  166. truthound/datadocs/__init__.py +260 -0
  167. truthound/datadocs/base.py +571 -0
  168. truthound/datadocs/builder.py +761 -0
  169. truthound/datadocs/charts.py +764 -0
  170. truthound/datadocs/dashboard/__init__.py +63 -0
  171. truthound/datadocs/dashboard/app.py +576 -0
  172. truthound/datadocs/dashboard/components.py +584 -0
  173. truthound/datadocs/dashboard/state.py +240 -0
  174. truthound/datadocs/engine/__init__.py +46 -0
  175. truthound/datadocs/engine/context.py +376 -0
  176. truthound/datadocs/engine/pipeline.py +618 -0
  177. truthound/datadocs/engine/registry.py +469 -0
  178. truthound/datadocs/exporters/__init__.py +49 -0
  179. truthound/datadocs/exporters/base.py +198 -0
  180. truthound/datadocs/exporters/html.py +178 -0
  181. truthound/datadocs/exporters/json_exporter.py +253 -0
  182. truthound/datadocs/exporters/markdown.py +284 -0
  183. truthound/datadocs/exporters/pdf.py +392 -0
  184. truthound/datadocs/i18n/__init__.py +86 -0
  185. truthound/datadocs/i18n/catalog.py +960 -0
  186. truthound/datadocs/i18n/formatting.py +505 -0
  187. truthound/datadocs/i18n/loader.py +256 -0
  188. truthound/datadocs/i18n/plurals.py +378 -0
  189. truthound/datadocs/renderers/__init__.py +42 -0
  190. truthound/datadocs/renderers/base.py +401 -0
  191. truthound/datadocs/renderers/custom.py +342 -0
  192. truthound/datadocs/renderers/jinja.py +697 -0
  193. truthound/datadocs/sections.py +736 -0
  194. truthound/datadocs/styles.py +931 -0
  195. truthound/datadocs/themes/__init__.py +101 -0
  196. truthound/datadocs/themes/base.py +336 -0
  197. truthound/datadocs/themes/default.py +417 -0
  198. truthound/datadocs/themes/enterprise.py +419 -0
  199. truthound/datadocs/themes/loader.py +336 -0
  200. truthound/datadocs/themes.py +301 -0
  201. truthound/datadocs/transformers/__init__.py +57 -0
  202. truthound/datadocs/transformers/base.py +268 -0
  203. truthound/datadocs/transformers/enrichers.py +544 -0
  204. truthound/datadocs/transformers/filters.py +447 -0
  205. truthound/datadocs/transformers/i18n.py +468 -0
  206. truthound/datadocs/versioning/__init__.py +62 -0
  207. truthound/datadocs/versioning/diff.py +639 -0
  208. truthound/datadocs/versioning/storage.py +497 -0
  209. truthound/datadocs/versioning/version.py +358 -0
  210. truthound/datasources/__init__.py +223 -0
  211. truthound/datasources/_async_protocols.py +222 -0
  212. truthound/datasources/_protocols.py +159 -0
  213. truthound/datasources/adapters.py +428 -0
  214. truthound/datasources/async_base.py +599 -0
  215. truthound/datasources/async_factory.py +511 -0
  216. truthound/datasources/base.py +516 -0
  217. truthound/datasources/factory.py +433 -0
  218. truthound/datasources/nosql/__init__.py +47 -0
  219. truthound/datasources/nosql/base.py +487 -0
  220. truthound/datasources/nosql/elasticsearch.py +801 -0
  221. truthound/datasources/nosql/mongodb.py +636 -0
  222. truthound/datasources/pandas_optimized.py +582 -0
  223. truthound/datasources/pandas_source.py +216 -0
  224. truthound/datasources/polars_source.py +395 -0
  225. truthound/datasources/spark_source.py +479 -0
  226. truthound/datasources/sql/__init__.py +154 -0
  227. truthound/datasources/sql/base.py +710 -0
  228. truthound/datasources/sql/bigquery.py +410 -0
  229. truthound/datasources/sql/cloud_base.py +199 -0
  230. truthound/datasources/sql/databricks.py +471 -0
  231. truthound/datasources/sql/mysql.py +316 -0
  232. truthound/datasources/sql/oracle.py +427 -0
  233. truthound/datasources/sql/postgresql.py +321 -0
  234. truthound/datasources/sql/redshift.py +479 -0
  235. truthound/datasources/sql/snowflake.py +439 -0
  236. truthound/datasources/sql/sqlite.py +286 -0
  237. truthound/datasources/sql/sqlserver.py +437 -0
  238. truthound/datasources/streaming/__init__.py +47 -0
  239. truthound/datasources/streaming/base.py +350 -0
  240. truthound/datasources/streaming/kafka.py +670 -0
  241. truthound/decorators.py +98 -0
  242. truthound/docs/__init__.py +69 -0
  243. truthound/docs/extractor.py +971 -0
  244. truthound/docs/generator.py +601 -0
  245. truthound/docs/parser.py +1037 -0
  246. truthound/docs/renderer.py +999 -0
  247. truthound/drift/__init__.py +22 -0
  248. truthound/drift/compare.py +189 -0
  249. truthound/drift/detectors.py +464 -0
  250. truthound/drift/report.py +160 -0
  251. truthound/execution/__init__.py +65 -0
  252. truthound/execution/_protocols.py +324 -0
  253. truthound/execution/base.py +576 -0
  254. truthound/execution/distributed/__init__.py +179 -0
  255. truthound/execution/distributed/aggregations.py +731 -0
  256. truthound/execution/distributed/arrow_bridge.py +817 -0
  257. truthound/execution/distributed/base.py +550 -0
  258. truthound/execution/distributed/dask_engine.py +976 -0
  259. truthound/execution/distributed/mixins.py +766 -0
  260. truthound/execution/distributed/protocols.py +756 -0
  261. truthound/execution/distributed/ray_engine.py +1127 -0
  262. truthound/execution/distributed/registry.py +446 -0
  263. truthound/execution/distributed/spark_engine.py +1011 -0
  264. truthound/execution/distributed/validator_adapter.py +682 -0
  265. truthound/execution/pandas_engine.py +401 -0
  266. truthound/execution/polars_engine.py +497 -0
  267. truthound/execution/pushdown/__init__.py +230 -0
  268. truthound/execution/pushdown/ast.py +1550 -0
  269. truthound/execution/pushdown/builder.py +1550 -0
  270. truthound/execution/pushdown/dialects.py +1072 -0
  271. truthound/execution/pushdown/executor.py +829 -0
  272. truthound/execution/pushdown/optimizer.py +1041 -0
  273. truthound/execution/sql_engine.py +518 -0
  274. truthound/infrastructure/__init__.py +189 -0
  275. truthound/infrastructure/audit.py +1515 -0
  276. truthound/infrastructure/config.py +1133 -0
  277. truthound/infrastructure/encryption.py +1132 -0
  278. truthound/infrastructure/logging.py +1503 -0
  279. truthound/infrastructure/metrics.py +1220 -0
  280. truthound/lineage/__init__.py +89 -0
  281. truthound/lineage/base.py +746 -0
  282. truthound/lineage/impact_analysis.py +474 -0
  283. truthound/lineage/integrations/__init__.py +22 -0
  284. truthound/lineage/integrations/openlineage.py +548 -0
  285. truthound/lineage/tracker.py +512 -0
  286. truthound/lineage/visualization/__init__.py +33 -0
  287. truthound/lineage/visualization/protocols.py +145 -0
  288. truthound/lineage/visualization/renderers/__init__.py +20 -0
  289. truthound/lineage/visualization/renderers/cytoscape.py +329 -0
  290. truthound/lineage/visualization/renderers/d3.py +331 -0
  291. truthound/lineage/visualization/renderers/graphviz.py +276 -0
  292. truthound/lineage/visualization/renderers/mermaid.py +308 -0
  293. truthound/maskers.py +113 -0
  294. truthound/ml/__init__.py +124 -0
  295. truthound/ml/anomaly_models/__init__.py +31 -0
  296. truthound/ml/anomaly_models/ensemble.py +362 -0
  297. truthound/ml/anomaly_models/isolation_forest.py +444 -0
  298. truthound/ml/anomaly_models/statistical.py +392 -0
  299. truthound/ml/base.py +1178 -0
  300. truthound/ml/drift_detection/__init__.py +26 -0
  301. truthound/ml/drift_detection/concept.py +381 -0
  302. truthound/ml/drift_detection/distribution.py +361 -0
  303. truthound/ml/drift_detection/feature.py +442 -0
  304. truthound/ml/drift_detection/multivariate.py +495 -0
  305. truthound/ml/monitoring/__init__.py +88 -0
  306. truthound/ml/monitoring/alerting/__init__.py +33 -0
  307. truthound/ml/monitoring/alerting/handlers.py +427 -0
  308. truthound/ml/monitoring/alerting/rules.py +508 -0
  309. truthound/ml/monitoring/collectors/__init__.py +19 -0
  310. truthound/ml/monitoring/collectors/composite.py +105 -0
  311. truthound/ml/monitoring/collectors/drift.py +324 -0
  312. truthound/ml/monitoring/collectors/performance.py +179 -0
  313. truthound/ml/monitoring/collectors/quality.py +369 -0
  314. truthound/ml/monitoring/monitor.py +536 -0
  315. truthound/ml/monitoring/protocols.py +451 -0
  316. truthound/ml/monitoring/stores/__init__.py +15 -0
  317. truthound/ml/monitoring/stores/memory.py +201 -0
  318. truthound/ml/monitoring/stores/prometheus.py +296 -0
  319. truthound/ml/rule_learning/__init__.py +25 -0
  320. truthound/ml/rule_learning/constraint_miner.py +443 -0
  321. truthound/ml/rule_learning/pattern_learner.py +499 -0
  322. truthound/ml/rule_learning/profile_learner.py +462 -0
  323. truthound/multitenancy/__init__.py +326 -0
  324. truthound/multitenancy/core.py +852 -0
  325. truthound/multitenancy/integration.py +597 -0
  326. truthound/multitenancy/isolation.py +630 -0
  327. truthound/multitenancy/manager.py +770 -0
  328. truthound/multitenancy/middleware.py +765 -0
  329. truthound/multitenancy/quota.py +537 -0
  330. truthound/multitenancy/resolvers.py +603 -0
  331. truthound/multitenancy/storage.py +703 -0
  332. truthound/observability/__init__.py +307 -0
  333. truthound/observability/context.py +531 -0
  334. truthound/observability/instrumentation.py +611 -0
  335. truthound/observability/logging.py +887 -0
  336. truthound/observability/metrics.py +1157 -0
  337. truthound/observability/tracing/__init__.py +178 -0
  338. truthound/observability/tracing/baggage.py +310 -0
  339. truthound/observability/tracing/config.py +426 -0
  340. truthound/observability/tracing/exporter.py +787 -0
  341. truthound/observability/tracing/integration.py +1018 -0
  342. truthound/observability/tracing/otel/__init__.py +146 -0
  343. truthound/observability/tracing/otel/adapter.py +982 -0
  344. truthound/observability/tracing/otel/bridge.py +1177 -0
  345. truthound/observability/tracing/otel/compat.py +681 -0
  346. truthound/observability/tracing/otel/config.py +691 -0
  347. truthound/observability/tracing/otel/detection.py +327 -0
  348. truthound/observability/tracing/otel/protocols.py +426 -0
  349. truthound/observability/tracing/processor.py +561 -0
  350. truthound/observability/tracing/propagator.py +757 -0
  351. truthound/observability/tracing/provider.py +569 -0
  352. truthound/observability/tracing/resource.py +515 -0
  353. truthound/observability/tracing/sampler.py +487 -0
  354. truthound/observability/tracing/span.py +676 -0
  355. truthound/plugins/__init__.py +198 -0
  356. truthound/plugins/base.py +599 -0
  357. truthound/plugins/cli.py +680 -0
  358. truthound/plugins/dependencies/__init__.py +42 -0
  359. truthound/plugins/dependencies/graph.py +422 -0
  360. truthound/plugins/dependencies/resolver.py +417 -0
  361. truthound/plugins/discovery.py +379 -0
  362. truthound/plugins/docs/__init__.py +46 -0
  363. truthound/plugins/docs/extractor.py +444 -0
  364. truthound/plugins/docs/renderer.py +499 -0
  365. truthound/plugins/enterprise_manager.py +877 -0
  366. truthound/plugins/examples/__init__.py +19 -0
  367. truthound/plugins/examples/custom_validators.py +317 -0
  368. truthound/plugins/examples/slack_notifier.py +312 -0
  369. truthound/plugins/examples/xml_reporter.py +254 -0
  370. truthound/plugins/hooks.py +558 -0
  371. truthound/plugins/lifecycle/__init__.py +43 -0
  372. truthound/plugins/lifecycle/hot_reload.py +402 -0
  373. truthound/plugins/lifecycle/manager.py +371 -0
  374. truthound/plugins/manager.py +736 -0
  375. truthound/plugins/registry.py +338 -0
  376. truthound/plugins/security/__init__.py +93 -0
  377. truthound/plugins/security/exceptions.py +332 -0
  378. truthound/plugins/security/policies.py +348 -0
  379. truthound/plugins/security/protocols.py +643 -0
  380. truthound/plugins/security/sandbox/__init__.py +45 -0
  381. truthound/plugins/security/sandbox/context.py +158 -0
  382. truthound/plugins/security/sandbox/engines/__init__.py +19 -0
  383. truthound/plugins/security/sandbox/engines/container.py +379 -0
  384. truthound/plugins/security/sandbox/engines/noop.py +144 -0
  385. truthound/plugins/security/sandbox/engines/process.py +336 -0
  386. truthound/plugins/security/sandbox/factory.py +211 -0
  387. truthound/plugins/security/signing/__init__.py +57 -0
  388. truthound/plugins/security/signing/service.py +330 -0
  389. truthound/plugins/security/signing/trust_store.py +368 -0
  390. truthound/plugins/security/signing/verifier.py +459 -0
  391. truthound/plugins/versioning/__init__.py +41 -0
  392. truthound/plugins/versioning/constraints.py +297 -0
  393. truthound/plugins/versioning/resolver.py +329 -0
  394. truthound/profiler/__init__.py +1729 -0
  395. truthound/profiler/_lazy.py +452 -0
  396. truthound/profiler/ab_testing/__init__.py +80 -0
  397. truthound/profiler/ab_testing/analysis.py +449 -0
  398. truthound/profiler/ab_testing/base.py +257 -0
  399. truthound/profiler/ab_testing/experiment.py +395 -0
  400. truthound/profiler/ab_testing/tracking.py +368 -0
  401. truthound/profiler/auto_threshold.py +1170 -0
  402. truthound/profiler/base.py +579 -0
  403. truthound/profiler/cache_patterns.py +911 -0
  404. truthound/profiler/caching.py +1303 -0
  405. truthound/profiler/column_profiler.py +712 -0
  406. truthound/profiler/comparison.py +1007 -0
  407. truthound/profiler/custom_patterns.py +1170 -0
  408. truthound/profiler/dashboard/__init__.py +50 -0
  409. truthound/profiler/dashboard/app.py +476 -0
  410. truthound/profiler/dashboard/components.py +457 -0
  411. truthound/profiler/dashboard/config.py +72 -0
  412. truthound/profiler/distributed/__init__.py +83 -0
  413. truthound/profiler/distributed/base.py +281 -0
  414. truthound/profiler/distributed/dask_backend.py +498 -0
  415. truthound/profiler/distributed/local_backend.py +293 -0
  416. truthound/profiler/distributed/profiler.py +304 -0
  417. truthound/profiler/distributed/ray_backend.py +374 -0
  418. truthound/profiler/distributed/spark_backend.py +375 -0
  419. truthound/profiler/distributed.py +1366 -0
  420. truthound/profiler/enterprise_sampling.py +1065 -0
  421. truthound/profiler/errors.py +488 -0
  422. truthound/profiler/evolution/__init__.py +91 -0
  423. truthound/profiler/evolution/alerts.py +426 -0
  424. truthound/profiler/evolution/changes.py +206 -0
  425. truthound/profiler/evolution/compatibility.py +365 -0
  426. truthound/profiler/evolution/detector.py +372 -0
  427. truthound/profiler/evolution/protocols.py +121 -0
  428. truthound/profiler/generators/__init__.py +48 -0
  429. truthound/profiler/generators/base.py +384 -0
  430. truthound/profiler/generators/ml_rules.py +375 -0
  431. truthound/profiler/generators/pattern_rules.py +384 -0
  432. truthound/profiler/generators/schema_rules.py +267 -0
  433. truthound/profiler/generators/stats_rules.py +324 -0
  434. truthound/profiler/generators/suite_generator.py +857 -0
  435. truthound/profiler/i18n.py +1542 -0
  436. truthound/profiler/incremental.py +554 -0
  437. truthound/profiler/incremental_validation.py +1710 -0
  438. truthound/profiler/integration/__init__.py +73 -0
  439. truthound/profiler/integration/adapters.py +345 -0
  440. truthound/profiler/integration/context.py +371 -0
  441. truthound/profiler/integration/executor.py +527 -0
  442. truthound/profiler/integration/naming.py +75 -0
  443. truthound/profiler/integration/protocols.py +243 -0
  444. truthound/profiler/memory.py +1185 -0
  445. truthound/profiler/migration/__init__.py +60 -0
  446. truthound/profiler/migration/base.py +345 -0
  447. truthound/profiler/migration/manager.py +444 -0
  448. truthound/profiler/migration/v1_0_to_v1_1.py +484 -0
  449. truthound/profiler/ml/__init__.py +73 -0
  450. truthound/profiler/ml/base.py +244 -0
  451. truthound/profiler/ml/classifier.py +507 -0
  452. truthound/profiler/ml/feature_extraction.py +604 -0
  453. truthound/profiler/ml/pretrained.py +448 -0
  454. truthound/profiler/ml_inference.py +1276 -0
  455. truthound/profiler/native_patterns.py +815 -0
  456. truthound/profiler/observability.py +1184 -0
  457. truthound/profiler/process_timeout.py +1566 -0
  458. truthound/profiler/progress.py +568 -0
  459. truthound/profiler/progress_callbacks.py +1734 -0
  460. truthound/profiler/quality.py +1345 -0
  461. truthound/profiler/resilience.py +1180 -0
  462. truthound/profiler/sampled_matcher.py +794 -0
  463. truthound/profiler/sampling.py +1288 -0
  464. truthound/profiler/scheduling/__init__.py +82 -0
  465. truthound/profiler/scheduling/protocols.py +214 -0
  466. truthound/profiler/scheduling/scheduler.py +474 -0
  467. truthound/profiler/scheduling/storage.py +457 -0
  468. truthound/profiler/scheduling/triggers.py +449 -0
  469. truthound/profiler/schema.py +603 -0
  470. truthound/profiler/streaming.py +685 -0
  471. truthound/profiler/streaming_patterns.py +1354 -0
  472. truthound/profiler/suite_cli.py +625 -0
  473. truthound/profiler/suite_config.py +789 -0
  474. truthound/profiler/suite_export.py +1268 -0
  475. truthound/profiler/table_profiler.py +547 -0
  476. truthound/profiler/timeout.py +565 -0
  477. truthound/profiler/validation.py +1532 -0
  478. truthound/profiler/visualization/__init__.py +118 -0
  479. truthound/profiler/visualization/base.py +346 -0
  480. truthound/profiler/visualization/generator.py +1259 -0
  481. truthound/profiler/visualization/plotly_renderer.py +811 -0
  482. truthound/profiler/visualization/renderers.py +669 -0
  483. truthound/profiler/visualization/sections.py +540 -0
  484. truthound/profiler/visualization.py +2122 -0
  485. truthound/profiler/yaml_validation.py +1151 -0
  486. truthound/py.typed +0 -0
  487. truthound/ratelimit/__init__.py +248 -0
  488. truthound/ratelimit/algorithms.py +1108 -0
  489. truthound/ratelimit/core.py +573 -0
  490. truthound/ratelimit/integration.py +532 -0
  491. truthound/ratelimit/limiter.py +663 -0
  492. truthound/ratelimit/middleware.py +700 -0
  493. truthound/ratelimit/policy.py +792 -0
  494. truthound/ratelimit/storage.py +763 -0
  495. truthound/rbac/__init__.py +340 -0
  496. truthound/rbac/core.py +976 -0
  497. truthound/rbac/integration.py +760 -0
  498. truthound/rbac/manager.py +1052 -0
  499. truthound/rbac/middleware.py +842 -0
  500. truthound/rbac/policy.py +954 -0
  501. truthound/rbac/storage.py +878 -0
  502. truthound/realtime/__init__.py +141 -0
  503. truthound/realtime/adapters/__init__.py +43 -0
  504. truthound/realtime/adapters/base.py +533 -0
  505. truthound/realtime/adapters/kafka.py +487 -0
  506. truthound/realtime/adapters/kinesis.py +479 -0
  507. truthound/realtime/adapters/mock.py +243 -0
  508. truthound/realtime/base.py +553 -0
  509. truthound/realtime/factory.py +382 -0
  510. truthound/realtime/incremental.py +660 -0
  511. truthound/realtime/processing/__init__.py +67 -0
  512. truthound/realtime/processing/exactly_once.py +575 -0
  513. truthound/realtime/processing/state.py +547 -0
  514. truthound/realtime/processing/windows.py +647 -0
  515. truthound/realtime/protocols.py +569 -0
  516. truthound/realtime/streaming.py +605 -0
  517. truthound/realtime/testing/__init__.py +32 -0
  518. truthound/realtime/testing/containers.py +615 -0
  519. truthound/realtime/testing/fixtures.py +484 -0
  520. truthound/report.py +280 -0
  521. truthound/reporters/__init__.py +46 -0
  522. truthound/reporters/_protocols.py +30 -0
  523. truthound/reporters/base.py +324 -0
  524. truthound/reporters/ci/__init__.py +66 -0
  525. truthound/reporters/ci/azure.py +436 -0
  526. truthound/reporters/ci/base.py +509 -0
  527. truthound/reporters/ci/bitbucket.py +567 -0
  528. truthound/reporters/ci/circleci.py +547 -0
  529. truthound/reporters/ci/detection.py +364 -0
  530. truthound/reporters/ci/factory.py +182 -0
  531. truthound/reporters/ci/github.py +388 -0
  532. truthound/reporters/ci/gitlab.py +471 -0
  533. truthound/reporters/ci/jenkins.py +525 -0
  534. truthound/reporters/console_reporter.py +299 -0
  535. truthound/reporters/factory.py +211 -0
  536. truthound/reporters/html_reporter.py +524 -0
  537. truthound/reporters/json_reporter.py +256 -0
  538. truthound/reporters/markdown_reporter.py +280 -0
  539. truthound/reporters/sdk/__init__.py +174 -0
  540. truthound/reporters/sdk/builder.py +558 -0
  541. truthound/reporters/sdk/mixins.py +1150 -0
  542. truthound/reporters/sdk/schema.py +1493 -0
  543. truthound/reporters/sdk/templates.py +666 -0
  544. truthound/reporters/sdk/testing.py +968 -0
  545. truthound/scanners.py +170 -0
  546. truthound/scheduling/__init__.py +122 -0
  547. truthound/scheduling/cron.py +1136 -0
  548. truthound/scheduling/presets.py +212 -0
  549. truthound/schema.py +275 -0
  550. truthound/secrets/__init__.py +173 -0
  551. truthound/secrets/base.py +618 -0
  552. truthound/secrets/cloud.py +682 -0
  553. truthound/secrets/integration.py +507 -0
  554. truthound/secrets/manager.py +633 -0
  555. truthound/secrets/oidc/__init__.py +172 -0
  556. truthound/secrets/oidc/base.py +902 -0
  557. truthound/secrets/oidc/credential_provider.py +623 -0
  558. truthound/secrets/oidc/exchangers.py +1001 -0
  559. truthound/secrets/oidc/github/__init__.py +110 -0
  560. truthound/secrets/oidc/github/claims.py +718 -0
  561. truthound/secrets/oidc/github/enhanced_provider.py +693 -0
  562. truthound/secrets/oidc/github/trust_policy.py +742 -0
  563. truthound/secrets/oidc/github/verification.py +723 -0
  564. truthound/secrets/oidc/github/workflow.py +691 -0
  565. truthound/secrets/oidc/providers.py +825 -0
  566. truthound/secrets/providers.py +506 -0
  567. truthound/secrets/resolver.py +495 -0
  568. truthound/stores/__init__.py +177 -0
  569. truthound/stores/backends/__init__.py +18 -0
  570. truthound/stores/backends/_protocols.py +340 -0
  571. truthound/stores/backends/azure_blob.py +530 -0
  572. truthound/stores/backends/concurrent_filesystem.py +915 -0
  573. truthound/stores/backends/connection_pool.py +1365 -0
  574. truthound/stores/backends/database.py +743 -0
  575. truthound/stores/backends/filesystem.py +538 -0
  576. truthound/stores/backends/gcs.py +399 -0
  577. truthound/stores/backends/memory.py +354 -0
  578. truthound/stores/backends/s3.py +434 -0
  579. truthound/stores/backpressure/__init__.py +84 -0
  580. truthound/stores/backpressure/base.py +375 -0
  581. truthound/stores/backpressure/circuit_breaker.py +434 -0
  582. truthound/stores/backpressure/monitor.py +376 -0
  583. truthound/stores/backpressure/strategies.py +677 -0
  584. truthound/stores/base.py +551 -0
  585. truthound/stores/batching/__init__.py +65 -0
  586. truthound/stores/batching/base.py +305 -0
  587. truthound/stores/batching/buffer.py +370 -0
  588. truthound/stores/batching/store.py +248 -0
  589. truthound/stores/batching/writer.py +521 -0
  590. truthound/stores/caching/__init__.py +60 -0
  591. truthound/stores/caching/backends.py +684 -0
  592. truthound/stores/caching/base.py +356 -0
  593. truthound/stores/caching/store.py +305 -0
  594. truthound/stores/compression/__init__.py +193 -0
  595. truthound/stores/compression/adaptive.py +694 -0
  596. truthound/stores/compression/base.py +514 -0
  597. truthound/stores/compression/pipeline.py +868 -0
  598. truthound/stores/compression/providers.py +672 -0
  599. truthound/stores/compression/streaming.py +832 -0
  600. truthound/stores/concurrency/__init__.py +81 -0
  601. truthound/stores/concurrency/atomic.py +556 -0
  602. truthound/stores/concurrency/index.py +775 -0
  603. truthound/stores/concurrency/locks.py +576 -0
  604. truthound/stores/concurrency/manager.py +482 -0
  605. truthound/stores/encryption/__init__.py +297 -0
  606. truthound/stores/encryption/base.py +952 -0
  607. truthound/stores/encryption/keys.py +1191 -0
  608. truthound/stores/encryption/pipeline.py +903 -0
  609. truthound/stores/encryption/providers.py +953 -0
  610. truthound/stores/encryption/streaming.py +950 -0
  611. truthound/stores/expectations.py +227 -0
  612. truthound/stores/factory.py +246 -0
  613. truthound/stores/migration/__init__.py +75 -0
  614. truthound/stores/migration/base.py +480 -0
  615. truthound/stores/migration/manager.py +347 -0
  616. truthound/stores/migration/registry.py +382 -0
  617. truthound/stores/migration/store.py +559 -0
  618. truthound/stores/observability/__init__.py +106 -0
  619. truthound/stores/observability/audit.py +718 -0
  620. truthound/stores/observability/config.py +270 -0
  621. truthound/stores/observability/factory.py +208 -0
  622. truthound/stores/observability/metrics.py +636 -0
  623. truthound/stores/observability/protocols.py +410 -0
  624. truthound/stores/observability/store.py +570 -0
  625. truthound/stores/observability/tracing.py +784 -0
  626. truthound/stores/replication/__init__.py +76 -0
  627. truthound/stores/replication/base.py +260 -0
  628. truthound/stores/replication/monitor.py +269 -0
  629. truthound/stores/replication/store.py +439 -0
  630. truthound/stores/replication/syncer.py +391 -0
  631. truthound/stores/results.py +359 -0
  632. truthound/stores/retention/__init__.py +77 -0
  633. truthound/stores/retention/base.py +378 -0
  634. truthound/stores/retention/policies.py +621 -0
  635. truthound/stores/retention/scheduler.py +279 -0
  636. truthound/stores/retention/store.py +526 -0
  637. truthound/stores/streaming/__init__.py +138 -0
  638. truthound/stores/streaming/base.py +801 -0
  639. truthound/stores/streaming/database.py +984 -0
  640. truthound/stores/streaming/filesystem.py +719 -0
  641. truthound/stores/streaming/reader.py +629 -0
  642. truthound/stores/streaming/s3.py +843 -0
  643. truthound/stores/streaming/writer.py +790 -0
  644. truthound/stores/tiering/__init__.py +108 -0
  645. truthound/stores/tiering/base.py +462 -0
  646. truthound/stores/tiering/manager.py +249 -0
  647. truthound/stores/tiering/policies.py +692 -0
  648. truthound/stores/tiering/store.py +526 -0
  649. truthound/stores/versioning/__init__.py +56 -0
  650. truthound/stores/versioning/base.py +376 -0
  651. truthound/stores/versioning/store.py +660 -0
  652. truthound/stores/versioning/strategies.py +353 -0
  653. truthound/types.py +56 -0
  654. truthound/validators/__init__.py +774 -0
  655. truthound/validators/aggregate/__init__.py +27 -0
  656. truthound/validators/aggregate/central.py +116 -0
  657. truthound/validators/aggregate/extremes.py +116 -0
  658. truthound/validators/aggregate/spread.py +118 -0
  659. truthound/validators/aggregate/sum.py +64 -0
  660. truthound/validators/aggregate/type.py +78 -0
  661. truthound/validators/anomaly/__init__.py +93 -0
  662. truthound/validators/anomaly/base.py +431 -0
  663. truthound/validators/anomaly/ml_based.py +1190 -0
  664. truthound/validators/anomaly/multivariate.py +647 -0
  665. truthound/validators/anomaly/statistical.py +599 -0
  666. truthound/validators/base.py +1089 -0
  667. truthound/validators/business_rule/__init__.py +46 -0
  668. truthound/validators/business_rule/base.py +147 -0
  669. truthound/validators/business_rule/checksum.py +509 -0
  670. truthound/validators/business_rule/financial.py +526 -0
  671. truthound/validators/cache.py +733 -0
  672. truthound/validators/completeness/__init__.py +39 -0
  673. truthound/validators/completeness/conditional.py +73 -0
  674. truthound/validators/completeness/default.py +98 -0
  675. truthound/validators/completeness/empty.py +103 -0
  676. truthound/validators/completeness/nan.py +337 -0
  677. truthound/validators/completeness/null.py +152 -0
  678. truthound/validators/cross_table/__init__.py +17 -0
  679. truthound/validators/cross_table/aggregate.py +333 -0
  680. truthound/validators/cross_table/row_count.py +122 -0
  681. truthound/validators/datetime/__init__.py +29 -0
  682. truthound/validators/datetime/format.py +78 -0
  683. truthound/validators/datetime/freshness.py +269 -0
  684. truthound/validators/datetime/order.py +73 -0
  685. truthound/validators/datetime/parseable.py +185 -0
  686. truthound/validators/datetime/range.py +202 -0
  687. truthound/validators/datetime/timezone.py +69 -0
  688. truthound/validators/distribution/__init__.py +49 -0
  689. truthound/validators/distribution/distribution.py +128 -0
  690. truthound/validators/distribution/monotonic.py +119 -0
  691. truthound/validators/distribution/outlier.py +178 -0
  692. truthound/validators/distribution/quantile.py +80 -0
  693. truthound/validators/distribution/range.py +254 -0
  694. truthound/validators/distribution/set.py +125 -0
  695. truthound/validators/distribution/statistical.py +459 -0
  696. truthound/validators/drift/__init__.py +79 -0
  697. truthound/validators/drift/base.py +427 -0
  698. truthound/validators/drift/multi_feature.py +401 -0
  699. truthound/validators/drift/numeric.py +395 -0
  700. truthound/validators/drift/psi.py +446 -0
  701. truthound/validators/drift/statistical.py +510 -0
  702. truthound/validators/enterprise.py +1658 -0
  703. truthound/validators/geospatial/__init__.py +80 -0
  704. truthound/validators/geospatial/base.py +97 -0
  705. truthound/validators/geospatial/boundary.py +238 -0
  706. truthound/validators/geospatial/coordinate.py +351 -0
  707. truthound/validators/geospatial/distance.py +399 -0
  708. truthound/validators/geospatial/polygon.py +665 -0
  709. truthound/validators/i18n/__init__.py +308 -0
  710. truthound/validators/i18n/bidi.py +571 -0
  711. truthound/validators/i18n/catalogs.py +570 -0
  712. truthound/validators/i18n/dialects.py +763 -0
  713. truthound/validators/i18n/extended_catalogs.py +549 -0
  714. truthound/validators/i18n/formatting.py +1434 -0
  715. truthound/validators/i18n/loader.py +1020 -0
  716. truthound/validators/i18n/messages.py +521 -0
  717. truthound/validators/i18n/plural.py +683 -0
  718. truthound/validators/i18n/protocols.py +855 -0
  719. truthound/validators/i18n/tms.py +1162 -0
  720. truthound/validators/localization/__init__.py +53 -0
  721. truthound/validators/localization/base.py +122 -0
  722. truthound/validators/localization/chinese.py +362 -0
  723. truthound/validators/localization/japanese.py +275 -0
  724. truthound/validators/localization/korean.py +524 -0
  725. truthound/validators/memory/__init__.py +94 -0
  726. truthound/validators/memory/approximate_knn.py +506 -0
  727. truthound/validators/memory/base.py +547 -0
  728. truthound/validators/memory/sgd_online.py +719 -0
  729. truthound/validators/memory/streaming_ecdf.py +753 -0
  730. truthound/validators/ml_feature/__init__.py +54 -0
  731. truthound/validators/ml_feature/base.py +249 -0
  732. truthound/validators/ml_feature/correlation.py +299 -0
  733. truthound/validators/ml_feature/leakage.py +344 -0
  734. truthound/validators/ml_feature/null_impact.py +270 -0
  735. truthound/validators/ml_feature/scale.py +264 -0
  736. truthound/validators/multi_column/__init__.py +89 -0
  737. truthound/validators/multi_column/arithmetic.py +284 -0
  738. truthound/validators/multi_column/base.py +231 -0
  739. truthound/validators/multi_column/comparison.py +273 -0
  740. truthound/validators/multi_column/consistency.py +312 -0
  741. truthound/validators/multi_column/statistical.py +299 -0
  742. truthound/validators/optimization/__init__.py +164 -0
  743. truthound/validators/optimization/aggregation.py +563 -0
  744. truthound/validators/optimization/covariance.py +556 -0
  745. truthound/validators/optimization/geo.py +626 -0
  746. truthound/validators/optimization/graph.py +587 -0
  747. truthound/validators/optimization/orchestrator.py +970 -0
  748. truthound/validators/optimization/profiling.py +1312 -0
  749. truthound/validators/privacy/__init__.py +223 -0
  750. truthound/validators/privacy/base.py +635 -0
  751. truthound/validators/privacy/ccpa.py +670 -0
  752. truthound/validators/privacy/gdpr.py +728 -0
  753. truthound/validators/privacy/global_patterns.py +604 -0
  754. truthound/validators/privacy/plugins.py +867 -0
  755. truthound/validators/profiling/__init__.py +52 -0
  756. truthound/validators/profiling/base.py +175 -0
  757. truthound/validators/profiling/cardinality.py +312 -0
  758. truthound/validators/profiling/entropy.py +391 -0
  759. truthound/validators/profiling/frequency.py +455 -0
  760. truthound/validators/pushdown_support.py +660 -0
  761. truthound/validators/query/__init__.py +91 -0
  762. truthound/validators/query/aggregate.py +346 -0
  763. truthound/validators/query/base.py +246 -0
  764. truthound/validators/query/column.py +249 -0
  765. truthound/validators/query/expression.py +274 -0
  766. truthound/validators/query/result.py +323 -0
  767. truthound/validators/query/row_count.py +264 -0
  768. truthound/validators/referential/__init__.py +80 -0
  769. truthound/validators/referential/base.py +395 -0
  770. truthound/validators/referential/cascade.py +391 -0
  771. truthound/validators/referential/circular.py +563 -0
  772. truthound/validators/referential/foreign_key.py +624 -0
  773. truthound/validators/referential/orphan.py +485 -0
  774. truthound/validators/registry.py +112 -0
  775. truthound/validators/schema/__init__.py +41 -0
  776. truthound/validators/schema/column_count.py +142 -0
  777. truthound/validators/schema/column_exists.py +80 -0
  778. truthound/validators/schema/column_order.py +82 -0
  779. truthound/validators/schema/column_pair.py +85 -0
  780. truthound/validators/schema/column_pair_set.py +195 -0
  781. truthound/validators/schema/column_type.py +94 -0
  782. truthound/validators/schema/multi_column.py +53 -0
  783. truthound/validators/schema/multi_column_aggregate.py +175 -0
  784. truthound/validators/schema/referential.py +274 -0
  785. truthound/validators/schema/table_schema.py +91 -0
  786. truthound/validators/schema_validator.py +219 -0
  787. truthound/validators/sdk/__init__.py +250 -0
  788. truthound/validators/sdk/builder.py +680 -0
  789. truthound/validators/sdk/decorators.py +474 -0
  790. truthound/validators/sdk/enterprise/__init__.py +211 -0
  791. truthound/validators/sdk/enterprise/docs.py +725 -0
  792. truthound/validators/sdk/enterprise/fuzzing.py +659 -0
  793. truthound/validators/sdk/enterprise/licensing.py +709 -0
  794. truthound/validators/sdk/enterprise/manager.py +543 -0
  795. truthound/validators/sdk/enterprise/resources.py +628 -0
  796. truthound/validators/sdk/enterprise/sandbox.py +766 -0
  797. truthound/validators/sdk/enterprise/signing.py +603 -0
  798. truthound/validators/sdk/enterprise/templates.py +865 -0
  799. truthound/validators/sdk/enterprise/versioning.py +659 -0
  800. truthound/validators/sdk/templates.py +757 -0
  801. truthound/validators/sdk/testing.py +807 -0
  802. truthound/validators/security/__init__.py +181 -0
  803. truthound/validators/security/redos/__init__.py +182 -0
  804. truthound/validators/security/redos/core.py +861 -0
  805. truthound/validators/security/redos/cpu_monitor.py +593 -0
  806. truthound/validators/security/redos/cve_database.py +791 -0
  807. truthound/validators/security/redos/ml/__init__.py +155 -0
  808. truthound/validators/security/redos/ml/base.py +785 -0
  809. truthound/validators/security/redos/ml/datasets.py +618 -0
  810. truthound/validators/security/redos/ml/features.py +359 -0
  811. truthound/validators/security/redos/ml/models.py +1000 -0
  812. truthound/validators/security/redos/ml/predictor.py +507 -0
  813. truthound/validators/security/redos/ml/storage.py +632 -0
  814. truthound/validators/security/redos/ml/training.py +571 -0
  815. truthound/validators/security/redos/ml_analyzer.py +937 -0
  816. truthound/validators/security/redos/optimizer.py +674 -0
  817. truthound/validators/security/redos/profiler.py +682 -0
  818. truthound/validators/security/redos/re2_engine.py +709 -0
  819. truthound/validators/security/redos.py +886 -0
  820. truthound/validators/security/sql_security.py +1247 -0
  821. truthound/validators/streaming/__init__.py +126 -0
  822. truthound/validators/streaming/base.py +292 -0
  823. truthound/validators/streaming/completeness.py +210 -0
  824. truthound/validators/streaming/mixin.py +575 -0
  825. truthound/validators/streaming/range.py +308 -0
  826. truthound/validators/streaming/sources.py +846 -0
  827. truthound/validators/string/__init__.py +57 -0
  828. truthound/validators/string/casing.py +158 -0
  829. truthound/validators/string/charset.py +96 -0
  830. truthound/validators/string/format.py +501 -0
  831. truthound/validators/string/json.py +77 -0
  832. truthound/validators/string/json_schema.py +184 -0
  833. truthound/validators/string/length.py +104 -0
  834. truthound/validators/string/like_pattern.py +237 -0
  835. truthound/validators/string/regex.py +202 -0
  836. truthound/validators/string/regex_extended.py +435 -0
  837. truthound/validators/table/__init__.py +88 -0
  838. truthound/validators/table/base.py +78 -0
  839. truthound/validators/table/column_count.py +198 -0
  840. truthound/validators/table/freshness.py +362 -0
  841. truthound/validators/table/row_count.py +251 -0
  842. truthound/validators/table/schema.py +333 -0
  843. truthound/validators/table/size.py +285 -0
  844. truthound/validators/timeout/__init__.py +102 -0
  845. truthound/validators/timeout/advanced/__init__.py +247 -0
  846. truthound/validators/timeout/advanced/circuit_breaker.py +675 -0
  847. truthound/validators/timeout/advanced/prediction.py +773 -0
  848. truthound/validators/timeout/advanced/priority.py +618 -0
  849. truthound/validators/timeout/advanced/redis_backend.py +770 -0
  850. truthound/validators/timeout/advanced/retry.py +721 -0
  851. truthound/validators/timeout/advanced/sampling.py +788 -0
  852. truthound/validators/timeout/advanced/sla.py +661 -0
  853. truthound/validators/timeout/advanced/telemetry.py +804 -0
  854. truthound/validators/timeout/cascade.py +477 -0
  855. truthound/validators/timeout/deadline.py +657 -0
  856. truthound/validators/timeout/degradation.py +525 -0
  857. truthound/validators/timeout/distributed.py +597 -0
  858. truthound/validators/timeseries/__init__.py +89 -0
  859. truthound/validators/timeseries/base.py +326 -0
  860. truthound/validators/timeseries/completeness.py +617 -0
  861. truthound/validators/timeseries/gap.py +485 -0
  862. truthound/validators/timeseries/monotonic.py +310 -0
  863. truthound/validators/timeseries/seasonality.py +422 -0
  864. truthound/validators/timeseries/trend.py +510 -0
  865. truthound/validators/uniqueness/__init__.py +59 -0
  866. truthound/validators/uniqueness/approximate.py +475 -0
  867. truthound/validators/uniqueness/distinct_values.py +253 -0
  868. truthound/validators/uniqueness/duplicate.py +118 -0
  869. truthound/validators/uniqueness/primary_key.py +140 -0
  870. truthound/validators/uniqueness/unique.py +191 -0
  871. truthound/validators/uniqueness/within_record.py +599 -0
  872. truthound/validators/utils.py +756 -0
  873. truthound-1.0.8.dist-info/METADATA +474 -0
  874. truthound-1.0.8.dist-info/RECORD +877 -0
  875. truthound-1.0.8.dist-info/WHEEL +4 -0
  876. truthound-1.0.8.dist-info/entry_points.txt +2 -0
  877. truthound-1.0.8.dist-info/licenses/LICENSE +190 -0
@@ -0,0 +1,1566 @@
1
+ """Process-isolated timeout execution for reliable operation termination.
2
+
3
+ This module provides a robust timeout system that can reliably terminate
4
+ operations including native Rust code (like Polars) by using process isolation.
5
+
6
+ Key features:
7
+ - Process-based execution for reliable termination
8
+ - Pre-execution complexity estimation
9
+ - Circuit breaker pattern for repeated failures
10
+ - Resource monitoring (memory, CPU)
11
+ - Graceful degradation with multiple strategies
12
+
13
+ Problem Solved:
14
+ - Python threading cannot interrupt Polars Rust operations
15
+ - Process isolation ensures SIGTERM/SIGKILL always works
16
+
17
+ Design Principles:
18
+ - Strategy Pattern: Multiple execution backends (thread, process, async)
19
+ - Circuit Breaker: Prevent cascade failures
20
+ - Bulkhead: Isolate resources per operation
21
+ - Fail-Fast: Pre-check before expensive operations
22
+
23
+ Example:
24
+ from truthound.profiler.process_timeout import (
25
+ ProcessTimeoutExecutor,
26
+ TimeoutConfig,
27
+ with_process_timeout,
28
+ )
29
+
30
+ # Basic usage
31
+ executor = ProcessTimeoutExecutor()
32
+ result = executor.execute(
33
+ expensive_polars_operation,
34
+ timeout_seconds=30,
35
+ )
36
+
37
+ # With complexity estimation
38
+ result = executor.execute_with_estimation(
39
+ operation,
40
+ data_size=1_000_000,
41
+ timeout_seconds=60,
42
+ )
43
+ """
44
+
45
+ from __future__ import annotations
46
+
47
+ import functools
48
+ import hashlib
49
+ import logging
50
+ import multiprocessing as mp
51
+ import os
52
+ import pickle
53
+ import queue
54
+ import signal
55
+ import sys
56
+ import threading
57
+ import time
58
+ import traceback
59
+ from abc import ABC, abstractmethod
60
+ from concurrent.futures import (
61
+ Future,
62
+ ProcessPoolExecutor,
63
+ ThreadPoolExecutor,
64
+ TimeoutError as FuturesTimeoutError,
65
+ )
66
+ from contextlib import contextmanager
67
+ from dataclasses import dataclass, field
68
+ from datetime import datetime, timedelta
69
+ from enum import Enum
70
+ from multiprocessing import Queue
71
+ from typing import (
72
+ Any,
73
+ Callable,
74
+ Generic,
75
+ Generator,
76
+ Protocol,
77
+ TypeVar,
78
+ )
79
+
80
+ logger = logging.getLogger(__name__)
81
+
82
+
83
+ # =============================================================================
84
+ # Types
85
+ # =============================================================================
86
+
87
+ T = TypeVar("T")
88
+ R = TypeVar("R")
89
+
90
+
91
+ class ExecutionBackend(str, Enum):
92
+ """Available execution backends."""
93
+
94
+ THREAD = "thread" # Fast but can't interrupt native code
95
+ PROCESS = "process" # Reliable but has serialization overhead
96
+ ADAPTIVE = "adaptive" # Auto-select based on operation type
97
+ INLINE = "inline" # No isolation (for debugging)
98
+
99
+
100
+ class TimeoutAction(str, Enum):
101
+ """Actions to take when timeout occurs."""
102
+
103
+ SKIP = "skip" # Skip and continue
104
+ PARTIAL = "partial" # Return partial results
105
+ FAIL = "fail" # Raise exception
106
+ RETRY = "retry" # Retry with extended timeout
107
+ CIRCUIT_BREAK = "circuit_break" # Open circuit breaker
108
+
109
+
110
+ class TerminationMethod(str, Enum):
111
+ """Methods for terminating processes."""
112
+
113
+ GRACEFUL = "graceful" # SIGTERM, wait, then SIGKILL
114
+ IMMEDIATE = "immediate" # SIGKILL directly
115
+ COOPERATIVE = "cooperative" # Set flag and wait
116
+
117
+
118
+ class CircuitState(str, Enum):
119
+ """Circuit breaker states."""
120
+
121
+ CLOSED = "closed" # Normal operation
122
+ OPEN = "open" # Rejecting requests
123
+ HALF_OPEN = "half_open" # Testing if recovered
124
+
125
+
126
+ # =============================================================================
127
+ # Execution Result
128
+ # =============================================================================
129
+
130
+
131
+ @dataclass
132
+ class ExecutionMetrics:
133
+ """Metrics from an execution attempt."""
134
+
135
+ started_at: datetime = field(default_factory=datetime.now)
136
+ completed_at: datetime | None = None
137
+ elapsed_seconds: float = 0.0
138
+ peak_memory_mb: float = 0.0
139
+ cpu_time_seconds: float = 0.0
140
+ backend_used: ExecutionBackend = ExecutionBackend.THREAD
141
+ was_terminated: bool = False
142
+ termination_method: TerminationMethod | None = None
143
+ retries: int = 0
144
+
145
+ def complete(self) -> None:
146
+ """Mark execution as complete."""
147
+ self.completed_at = datetime.now()
148
+ self.elapsed_seconds = (self.completed_at - self.started_at).total_seconds()
149
+
150
+
151
+ @dataclass
152
+ class ExecutionResult(Generic[T]):
153
+ """Result of a timed execution.
154
+
155
+ Attributes:
156
+ success: Whether operation completed successfully
157
+ value: Result value if successful
158
+ error: Exception if failed
159
+ timed_out: Whether operation was terminated due to timeout
160
+ metrics: Execution metrics
161
+ partial_result: Partial result if available
162
+ """
163
+
164
+ success: bool
165
+ value: T | None = None
166
+ error: Exception | None = None
167
+ timed_out: bool = False
168
+ metrics: ExecutionMetrics = field(default_factory=ExecutionMetrics)
169
+ partial_result: Any = None
170
+
171
+ @classmethod
172
+ def ok(cls, value: T, metrics: ExecutionMetrics | None = None) -> "ExecutionResult[T]":
173
+ """Create successful result."""
174
+ m = metrics or ExecutionMetrics()
175
+ m.complete()
176
+ return cls(success=True, value=value, metrics=m)
177
+
178
+ @classmethod
179
+ def timeout(
180
+ cls,
181
+ metrics: ExecutionMetrics | None = None,
182
+ partial: Any = None,
183
+ ) -> "ExecutionResult[T]":
184
+ """Create timeout result."""
185
+ m = metrics or ExecutionMetrics()
186
+ m.complete()
187
+ m.was_terminated = True
188
+ return cls(success=False, timed_out=True, metrics=m, partial_result=partial)
189
+
190
+ @classmethod
191
+ def failure(
192
+ cls,
193
+ error: Exception,
194
+ metrics: ExecutionMetrics | None = None,
195
+ ) -> "ExecutionResult[T]":
196
+ """Create failure result."""
197
+ m = metrics or ExecutionMetrics()
198
+ m.complete()
199
+ return cls(success=False, error=error, metrics=m)
200
+
201
+ def to_dict(self) -> dict[str, Any]:
202
+ """Serialize to dictionary."""
203
+ return {
204
+ "success": self.success,
205
+ "timed_out": self.timed_out,
206
+ "elapsed_seconds": self.metrics.elapsed_seconds,
207
+ "backend_used": self.metrics.backend_used.value,
208
+ "was_terminated": self.metrics.was_terminated,
209
+ "error": str(self.error) if self.error else None,
210
+ }
211
+
212
+
213
+ # =============================================================================
214
+ # Complexity Estimator
215
+ # =============================================================================
216
+
217
+
218
+ @dataclass
219
+ class ComplexityEstimate:
220
+ """Estimated complexity of an operation.
221
+
222
+ Attributes:
223
+ estimated_time_seconds: Expected execution time
224
+ estimated_memory_mb: Expected memory usage
225
+ confidence: Confidence in the estimate (0-1)
226
+ recommendation: Recommended execution backend
227
+ should_sample: Whether to sample data first
228
+ max_safe_rows: Maximum rows to process safely
229
+ """
230
+
231
+ estimated_time_seconds: float
232
+ estimated_memory_mb: float
233
+ confidence: float = 0.5
234
+ recommendation: ExecutionBackend = ExecutionBackend.ADAPTIVE
235
+ should_sample: bool = False
236
+ max_safe_rows: int = 0
237
+ risk_level: str = "unknown"
238
+
239
+ def exceeds_timeout(self, timeout_seconds: float) -> bool:
240
+ """Check if estimated time exceeds timeout."""
241
+ # Use confidence-adjusted estimate
242
+ adjusted = self.estimated_time_seconds * (2 - self.confidence)
243
+ return adjusted > timeout_seconds
244
+
245
+ def exceeds_memory(self, max_memory_mb: float) -> bool:
246
+ """Check if estimated memory exceeds limit."""
247
+ adjusted = self.estimated_memory_mb * (2 - self.confidence)
248
+ return adjusted > max_memory_mb
249
+
250
+
251
+ class ComplexityEstimator(Protocol):
252
+ """Protocol for complexity estimation."""
253
+
254
+ def estimate(
255
+ self,
256
+ operation_type: str,
257
+ data_size: int,
258
+ column_count: int = 1,
259
+ **kwargs: Any,
260
+ ) -> ComplexityEstimate:
261
+ """Estimate operation complexity."""
262
+ ...
263
+
264
+
265
+ class DefaultComplexityEstimator:
266
+ """Default complexity estimator using heuristics.
267
+
268
+ Uses empirical constants calibrated for common operations.
269
+ """
270
+
271
+ # Calibration constants (rows per second for different operations)
272
+ OPERATION_SPEEDS: dict[str, float] = {
273
+ "profile_column": 100_000, # 100K rows/sec
274
+ "pattern_match": 50_000, # 50K rows/sec
275
+ "distribution": 200_000, # 200K rows/sec
276
+ "correlation": 10_000, # 10K rows/sec (O(n²))
277
+ "unique_count": 150_000, # 150K rows/sec
278
+ "null_check": 500_000, # 500K rows/sec
279
+ "type_inference": 75_000, # 75K rows/sec
280
+ "default": 100_000, # Default fallback
281
+ }
282
+
283
+ # Memory constants (bytes per row for different operations)
284
+ MEMORY_PER_ROW: dict[str, float] = {
285
+ "profile_column": 100, # 100 bytes/row
286
+ "pattern_match": 200, # 200 bytes/row (regex buffers)
287
+ "distribution": 50, # 50 bytes/row
288
+ "correlation": 300, # 300 bytes/row (matrix)
289
+ "unique_count": 150, # 150 bytes/row (hash set)
290
+ "default": 100,
291
+ }
292
+
293
+ def __init__(self, safety_factor: float = 1.5):
294
+ """Initialize estimator.
295
+
296
+ Args:
297
+ safety_factor: Multiplier for conservative estimates
298
+ """
299
+ self.safety_factor = safety_factor
300
+ self._history: list[tuple[str, int, float]] = [] # For calibration
301
+
302
+ def estimate(
303
+ self,
304
+ operation_type: str,
305
+ data_size: int,
306
+ column_count: int = 1,
307
+ **kwargs: Any,
308
+ ) -> ComplexityEstimate:
309
+ """Estimate operation complexity.
310
+
311
+ Args:
312
+ operation_type: Type of operation
313
+ data_size: Number of rows
314
+ column_count: Number of columns
315
+ **kwargs: Additional hints
316
+
317
+ Returns:
318
+ Complexity estimate
319
+ """
320
+ # Get calibration constants
321
+ speed = self.OPERATION_SPEEDS.get(
322
+ operation_type,
323
+ self.OPERATION_SPEEDS["default"],
324
+ )
325
+ memory_per_row = self.MEMORY_PER_ROW.get(
326
+ operation_type,
327
+ self.MEMORY_PER_ROW["default"],
328
+ )
329
+
330
+ # Calculate estimates
331
+ time_estimate = (data_size / speed) * column_count * self.safety_factor
332
+ memory_estimate = (data_size * memory_per_row * column_count) / (1024 * 1024) # MB
333
+
334
+ # Determine confidence based on data size
335
+ if data_size < 10_000:
336
+ confidence = 0.9 # High confidence for small data
337
+ elif data_size < 100_000:
338
+ confidence = 0.7
339
+ elif data_size < 1_000_000:
340
+ confidence = 0.5
341
+ else:
342
+ confidence = 0.3 # Low confidence for very large data
343
+
344
+ # Determine recommendation
345
+ if time_estimate > 60 or memory_estimate > 1000:
346
+ recommendation = ExecutionBackend.PROCESS
347
+ should_sample = True
348
+ risk_level = "high"
349
+ elif time_estimate > 10 or memory_estimate > 500:
350
+ recommendation = ExecutionBackend.PROCESS
351
+ should_sample = False
352
+ risk_level = "medium"
353
+ else:
354
+ recommendation = ExecutionBackend.THREAD
355
+ should_sample = False
356
+ risk_level = "low"
357
+
358
+ # Calculate safe row limit
359
+ max_safe_rows = int(speed * 30) # 30 seconds worth
360
+
361
+ return ComplexityEstimate(
362
+ estimated_time_seconds=time_estimate,
363
+ estimated_memory_mb=memory_estimate,
364
+ confidence=confidence,
365
+ recommendation=recommendation,
366
+ should_sample=should_sample,
367
+ max_safe_rows=max_safe_rows,
368
+ risk_level=risk_level,
369
+ )
370
+
371
+ def record_actual(
372
+ self,
373
+ operation_type: str,
374
+ data_size: int,
375
+ actual_seconds: float,
376
+ ) -> None:
377
+ """Record actual execution time for future calibration.
378
+
379
+ Args:
380
+ operation_type: Type of operation
381
+ data_size: Number of rows
382
+ actual_seconds: Actual execution time
383
+ """
384
+ self._history.append((operation_type, data_size, actual_seconds))
385
+
386
+ # Keep last 100 records
387
+ if len(self._history) > 100:
388
+ self._history = self._history[-100:]
389
+
390
+
391
+ # Global estimator instance
392
+ default_complexity_estimator = DefaultComplexityEstimator()
393
+
394
+
395
+ # =============================================================================
396
+ # Circuit Breaker
397
+ # =============================================================================
398
+
399
+
400
+ @dataclass
401
+ class CircuitBreakerConfig:
402
+ """Configuration for circuit breaker.
403
+
404
+ Attributes:
405
+ failure_threshold: Failures before opening circuit
406
+ success_threshold: Successes before closing from half-open
407
+ timeout_seconds: Time before trying half-open from open
408
+ half_open_max_calls: Max calls in half-open state
409
+ """
410
+
411
+ failure_threshold: int = 5
412
+ success_threshold: int = 2
413
+ timeout_seconds: float = 60.0
414
+ half_open_max_calls: int = 3
415
+
416
+
417
+ class CircuitBreaker:
418
+ """Circuit breaker for preventing cascade failures.
419
+
420
+ Implements the circuit breaker pattern:
421
+ - CLOSED: Normal operation, track failures
422
+ - OPEN: Reject all requests, wait for timeout
423
+ - HALF_OPEN: Allow limited requests to test recovery
424
+
425
+ Example:
426
+ breaker = CircuitBreaker()
427
+
428
+ if breaker.can_execute():
429
+ try:
430
+ result = operation()
431
+ breaker.record_success()
432
+ except Exception:
433
+ breaker.record_failure()
434
+ raise
435
+ else:
436
+ raise CircuitOpenError("Circuit is open")
437
+ """
438
+
439
+ def __init__(
440
+ self,
441
+ name: str = "default",
442
+ config: CircuitBreakerConfig | None = None,
443
+ ):
444
+ """Initialize circuit breaker.
445
+
446
+ Args:
447
+ name: Identifier for this breaker
448
+ config: Configuration
449
+ """
450
+ self.name = name
451
+ self.config = config or CircuitBreakerConfig()
452
+ self._state = CircuitState.CLOSED
453
+ self._failure_count = 0
454
+ self._success_count = 0
455
+ self._last_failure_time: float | None = None
456
+ self._half_open_calls = 0
457
+ self._lock = threading.RLock()
458
+
459
+ @property
460
+ def state(self) -> CircuitState:
461
+ """Get current state."""
462
+ with self._lock:
463
+ self._check_state_transition()
464
+ return self._state
465
+
466
+ @property
467
+ def is_closed(self) -> bool:
468
+ """Check if circuit is closed (normal)."""
469
+ return self.state == CircuitState.CLOSED
470
+
471
+ @property
472
+ def is_open(self) -> bool:
473
+ """Check if circuit is open (rejecting)."""
474
+ return self.state == CircuitState.OPEN
475
+
476
+ def can_execute(self) -> bool:
477
+ """Check if execution is allowed.
478
+
479
+ Returns:
480
+ True if execution should proceed
481
+ """
482
+ with self._lock:
483
+ self._check_state_transition()
484
+
485
+ if self._state == CircuitState.CLOSED:
486
+ return True
487
+
488
+ if self._state == CircuitState.HALF_OPEN:
489
+ if self._half_open_calls < self.config.half_open_max_calls:
490
+ self._half_open_calls += 1
491
+ return True
492
+ return False
493
+
494
+ # OPEN state
495
+ return False
496
+
497
+ def record_success(self) -> None:
498
+ """Record a successful execution."""
499
+ with self._lock:
500
+ if self._state == CircuitState.HALF_OPEN:
501
+ self._success_count += 1
502
+ if self._success_count >= self.config.success_threshold:
503
+ self._close()
504
+ elif self._state == CircuitState.CLOSED:
505
+ # Reset failure count on success
506
+ self._failure_count = 0
507
+
508
+ def record_failure(self) -> None:
509
+ """Record a failed execution."""
510
+ with self._lock:
511
+ self._last_failure_time = time.time()
512
+
513
+ if self._state == CircuitState.HALF_OPEN:
514
+ self._open()
515
+ elif self._state == CircuitState.CLOSED:
516
+ self._failure_count += 1
517
+ if self._failure_count >= self.config.failure_threshold:
518
+ self._open()
519
+
520
+ def reset(self) -> None:
521
+ """Reset circuit to closed state."""
522
+ with self._lock:
523
+ self._close()
524
+
525
+ def _check_state_transition(self) -> None:
526
+ """Check and perform state transitions."""
527
+ if self._state == CircuitState.OPEN:
528
+ if self._last_failure_time is not None:
529
+ elapsed = time.time() - self._last_failure_time
530
+ if elapsed >= self.config.timeout_seconds:
531
+ self._half_open()
532
+
533
+ def _open(self) -> None:
534
+ """Transition to open state."""
535
+ self._state = CircuitState.OPEN
536
+ self._failure_count = 0
537
+ self._success_count = 0
538
+ logger.warning(f"Circuit breaker '{self.name}' opened")
539
+
540
+ def _close(self) -> None:
541
+ """Transition to closed state."""
542
+ self._state = CircuitState.CLOSED
543
+ self._failure_count = 0
544
+ self._success_count = 0
545
+ self._half_open_calls = 0
546
+ logger.info(f"Circuit breaker '{self.name}' closed")
547
+
548
+ def _half_open(self) -> None:
549
+ """Transition to half-open state."""
550
+ self._state = CircuitState.HALF_OPEN
551
+ self._success_count = 0
552
+ self._half_open_calls = 0
553
+ logger.info(f"Circuit breaker '{self.name}' half-open")
554
+
555
+ def get_stats(self) -> dict[str, Any]:
556
+ """Get circuit breaker statistics."""
557
+ with self._lock:
558
+ return {
559
+ "name": self.name,
560
+ "state": self._state.value,
561
+ "failure_count": self._failure_count,
562
+ "success_count": self._success_count,
563
+ "last_failure": self._last_failure_time,
564
+ }
565
+
566
+
567
+ class CircuitBreakerRegistry:
568
+ """Registry for circuit breakers by operation type."""
569
+
570
+ def __init__(self) -> None:
571
+ self._breakers: dict[str, CircuitBreaker] = {}
572
+ self._lock = threading.RLock()
573
+
574
+ def get(
575
+ self,
576
+ name: str,
577
+ config: CircuitBreakerConfig | None = None,
578
+ ) -> CircuitBreaker:
579
+ """Get or create a circuit breaker."""
580
+ with self._lock:
581
+ if name not in self._breakers:
582
+ self._breakers[name] = CircuitBreaker(name, config)
583
+ return self._breakers[name]
584
+
585
+ def reset_all(self) -> None:
586
+ """Reset all circuit breakers."""
587
+ with self._lock:
588
+ for breaker in self._breakers.values():
589
+ breaker.reset()
590
+
591
+
592
+ # Global registry
593
+ circuit_breaker_registry = CircuitBreakerRegistry()
594
+
595
+
596
+ # =============================================================================
597
+ # Execution Strategy Protocol
598
+ # =============================================================================
599
+
600
+
601
+ class ExecutionStrategy(ABC):
602
+ """Abstract base class for execution strategies.
603
+
604
+ Defines how an operation is executed with timeout control.
605
+ """
606
+
607
+ name: ExecutionBackend
608
+
609
+ @abstractmethod
610
+ def execute(
611
+ self,
612
+ func: Callable[[], T],
613
+ timeout_seconds: float,
614
+ **kwargs: Any,
615
+ ) -> ExecutionResult[T]:
616
+ """Execute function with timeout.
617
+
618
+ Args:
619
+ func: Function to execute
620
+ timeout_seconds: Timeout in seconds
621
+ **kwargs: Additional options
622
+
623
+ Returns:
624
+ Execution result
625
+ """
626
+ pass
627
+
628
+ @abstractmethod
629
+ def is_available(self) -> bool:
630
+ """Check if this strategy is available."""
631
+ pass
632
+
633
+
634
+ # =============================================================================
635
+ # Thread-Based Strategy
636
+ # =============================================================================
637
+
638
+
639
+ class ThreadExecutionStrategy(ExecutionStrategy):
640
+ """Thread-based execution strategy.
641
+
642
+ Fast with low overhead but cannot interrupt native code.
643
+ Best for pure Python operations.
644
+ """
645
+
646
+ name = ExecutionBackend.THREAD
647
+
648
+ def __init__(self, max_workers: int = 1):
649
+ self.max_workers = max_workers
650
+
651
+ def execute(
652
+ self,
653
+ func: Callable[[], T],
654
+ timeout_seconds: float,
655
+ **kwargs: Any,
656
+ ) -> ExecutionResult[T]:
657
+ """Execute in thread with timeout."""
658
+ metrics = ExecutionMetrics(backend_used=self.name)
659
+
660
+ with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
661
+ future: Future[T] = executor.submit(func)
662
+
663
+ try:
664
+ value = future.result(timeout=timeout_seconds)
665
+ return ExecutionResult.ok(value, metrics)
666
+
667
+ except FuturesTimeoutError:
668
+ metrics.was_terminated = True
669
+ # Note: Cannot actually terminate the thread
670
+ return ExecutionResult.timeout(metrics)
671
+
672
+ except Exception as e:
673
+ return ExecutionResult.failure(e, metrics)
674
+
675
+ def is_available(self) -> bool:
676
+ return True
677
+
678
+
679
+ # =============================================================================
680
+ # Process-Based Strategy
681
+ # =============================================================================
682
+
683
+
684
+ def _process_worker(
685
+ func_pickle: bytes,
686
+ result_queue: Queue,
687
+ ready_event: mp.Event,
688
+ ) -> None:
689
+ """Worker function for process execution.
690
+
691
+ This runs in a separate process and can be terminated.
692
+
693
+ Args:
694
+ func_pickle: Pickled function to execute
695
+ result_queue: Queue to put result
696
+ ready_event: Event to signal ready
697
+ """
698
+ try:
699
+ # Signal ready
700
+ ready_event.set()
701
+
702
+ # Deserialize and execute
703
+ func = pickle.loads(func_pickle)
704
+ result = func()
705
+
706
+ # Put success result
707
+ result_queue.put(("success", result, None))
708
+
709
+ except Exception as e:
710
+ # Put error result
711
+ tb = traceback.format_exc()
712
+ result_queue.put(("error", None, (type(e).__name__, str(e), tb)))
713
+
714
+
715
+ class ProcessExecutionStrategy(ExecutionStrategy):
716
+ """Process-based execution strategy.
717
+
718
+ Uses separate process for reliable termination.
719
+ Has serialization overhead but can terminate any code.
720
+ """
721
+
722
+ name = ExecutionBackend.PROCESS
723
+
724
+ def __init__(
725
+ self,
726
+ graceful_timeout: float = 2.0,
727
+ start_method: str | None = None,
728
+ ):
729
+ """Initialize process strategy.
730
+
731
+ Args:
732
+ graceful_timeout: Time to wait after SIGTERM before SIGKILL
733
+ start_method: Process start method (spawn, fork, forkserver)
734
+ """
735
+ self.graceful_timeout = graceful_timeout
736
+ self.start_method = start_method or self._get_default_start_method()
737
+
738
+ def _get_default_start_method(self) -> str:
739
+ """Get default start method for platform."""
740
+ if sys.platform == "darwin":
741
+ return "spawn" # fork is problematic on macOS
742
+ elif sys.platform == "win32":
743
+ return "spawn"
744
+ else:
745
+ return "fork" # Faster on Linux
746
+
747
+ def execute(
748
+ self,
749
+ func: Callable[[], T],
750
+ timeout_seconds: float,
751
+ **kwargs: Any,
752
+ ) -> ExecutionResult[T]:
753
+ """Execute in separate process with timeout."""
754
+ metrics = ExecutionMetrics(backend_used=self.name)
755
+
756
+ # Serialize function
757
+ try:
758
+ func_pickle = pickle.dumps(func)
759
+ except Exception as e:
760
+ return ExecutionResult.failure(
761
+ ValueError(f"Cannot serialize function: {e}"),
762
+ metrics,
763
+ )
764
+
765
+ # Create communication primitives
766
+ ctx = mp.get_context(self.start_method)
767
+ result_queue: Queue = ctx.Queue()
768
+ ready_event = ctx.Event()
769
+
770
+ # Start process
771
+ process = ctx.Process(
772
+ target=_process_worker,
773
+ args=(func_pickle, result_queue, ready_event),
774
+ )
775
+ process.start()
776
+
777
+ try:
778
+ # Wait for process to be ready
779
+ if not ready_event.wait(timeout=5.0):
780
+ self._terminate_process(process, metrics)
781
+ return ExecutionResult.timeout(metrics)
782
+
783
+ # Wait for result with timeout
784
+ try:
785
+ status, value, error_info = result_queue.get(timeout=timeout_seconds)
786
+
787
+ if status == "success":
788
+ return ExecutionResult.ok(value, metrics)
789
+ else:
790
+ error_type, error_msg, tb = error_info
791
+ error = RuntimeError(f"{error_type}: {error_msg}")
792
+ return ExecutionResult.failure(error, metrics)
793
+
794
+ except queue.Empty:
795
+ # Timeout
796
+ self._terminate_process(process, metrics)
797
+ return ExecutionResult.timeout(metrics)
798
+
799
+ finally:
800
+ # Ensure process is cleaned up
801
+ if process.is_alive():
802
+ self._terminate_process(process, metrics)
803
+ process.join(timeout=1.0)
804
+
805
+ def _terminate_process(
806
+ self,
807
+ process: mp.Process,
808
+ metrics: ExecutionMetrics,
809
+ ) -> None:
810
+ """Terminate a process gracefully then forcefully."""
811
+ metrics.was_terminated = True
812
+
813
+ if not process.is_alive():
814
+ return
815
+
816
+ # Try graceful termination
817
+ process.terminate()
818
+ metrics.termination_method = TerminationMethod.GRACEFUL
819
+
820
+ # Wait for graceful exit
821
+ process.join(timeout=self.graceful_timeout)
822
+
823
+ # Force kill if still alive
824
+ if process.is_alive():
825
+ try:
826
+ os.kill(process.pid, signal.SIGKILL)
827
+ metrics.termination_method = TerminationMethod.IMMEDIATE
828
+ except (ProcessLookupError, OSError):
829
+ pass
830
+
831
+ process.join(timeout=1.0)
832
+
833
+ def is_available(self) -> bool:
834
+ """Check if multiprocessing is available."""
835
+ try:
836
+ ctx = mp.get_context(self.start_method)
837
+ return True
838
+ except Exception:
839
+ return False
840
+
841
+
842
+ # =============================================================================
843
+ # Adaptive Strategy
844
+ # =============================================================================
845
+
846
+
847
+ class AdaptiveExecutionStrategy(ExecutionStrategy):
848
+ """Adaptive strategy that selects backend based on operation.
849
+
850
+ Uses complexity estimation to choose between thread and process.
851
+ """
852
+
853
+ name = ExecutionBackend.ADAPTIVE
854
+
855
+ def __init__(
856
+ self,
857
+ estimator: ComplexityEstimator | None = None,
858
+ thread_threshold_seconds: float = 5.0,
859
+ ):
860
+ """Initialize adaptive strategy.
861
+
862
+ Args:
863
+ estimator: Complexity estimator
864
+ thread_threshold_seconds: Use thread for operations under this
865
+ """
866
+ self.estimator = estimator or default_complexity_estimator
867
+ self.thread_threshold = thread_threshold_seconds
868
+ self._thread_strategy = ThreadExecutionStrategy()
869
+ self._process_strategy = ProcessExecutionStrategy()
870
+
871
+ def execute(
872
+ self,
873
+ func: Callable[[], T],
874
+ timeout_seconds: float,
875
+ operation_type: str = "default",
876
+ data_size: int = 0,
877
+ **kwargs: Any,
878
+ ) -> ExecutionResult[T]:
879
+ """Execute with adaptive backend selection."""
880
+ # Estimate complexity
881
+ estimate = self.estimator.estimate(
882
+ operation_type=operation_type,
883
+ data_size=data_size,
884
+ )
885
+
886
+ # Select strategy
887
+ if estimate.recommendation == ExecutionBackend.PROCESS:
888
+ strategy = self._process_strategy
889
+ elif estimate.estimated_time_seconds > self.thread_threshold:
890
+ strategy = self._process_strategy
891
+ else:
892
+ strategy = self._thread_strategy
893
+
894
+ logger.debug(
895
+ f"Adaptive strategy selected {strategy.name.value} for "
896
+ f"{operation_type} ({data_size} rows, "
897
+ f"est. {estimate.estimated_time_seconds:.2f}s)"
898
+ )
899
+
900
+ # Execute
901
+ result = strategy.execute(func, timeout_seconds, **kwargs)
902
+
903
+ # Record actual time for calibration
904
+ if isinstance(self.estimator, DefaultComplexityEstimator):
905
+ self.estimator.record_actual(
906
+ operation_type,
907
+ data_size,
908
+ result.metrics.elapsed_seconds,
909
+ )
910
+
911
+ return result
912
+
913
+ def is_available(self) -> bool:
914
+ return self._thread_strategy.is_available()
915
+
916
+
917
+ # =============================================================================
918
+ # Inline Strategy (No Isolation)
919
+ # =============================================================================
920
+
921
+
922
+ class InlineExecutionStrategy(ExecutionStrategy):
923
+ """Inline execution without isolation.
924
+
925
+ Useful for debugging and trusted operations.
926
+ No timeout protection.
927
+ """
928
+
929
+ name = ExecutionBackend.INLINE
930
+
931
+ def execute(
932
+ self,
933
+ func: Callable[[], T],
934
+ timeout_seconds: float,
935
+ **kwargs: Any,
936
+ ) -> ExecutionResult[T]:
937
+ """Execute inline without isolation."""
938
+ metrics = ExecutionMetrics(backend_used=self.name)
939
+
940
+ try:
941
+ value = func()
942
+ return ExecutionResult.ok(value, metrics)
943
+ except Exception as e:
944
+ return ExecutionResult.failure(e, metrics)
945
+
946
+ def is_available(self) -> bool:
947
+ return True
948
+
949
+
950
+ # =============================================================================
951
+ # Strategy Registry
952
+ # =============================================================================
953
+
954
+
955
+ class ExecutionStrategyRegistry:
956
+ """Registry for execution strategies."""
957
+
958
+ def __init__(self) -> None:
959
+ self._strategies: dict[ExecutionBackend, ExecutionStrategy] = {}
960
+ self._register_defaults()
961
+
962
+ def _register_defaults(self) -> None:
963
+ """Register default strategies."""
964
+ self.register(ThreadExecutionStrategy())
965
+ self.register(ProcessExecutionStrategy())
966
+ self.register(AdaptiveExecutionStrategy())
967
+ self.register(InlineExecutionStrategy())
968
+
969
+ def register(self, strategy: ExecutionStrategy) -> None:
970
+ """Register a strategy."""
971
+ self._strategies[strategy.name] = strategy
972
+
973
+ def get(self, backend: ExecutionBackend) -> ExecutionStrategy:
974
+ """Get strategy by backend type."""
975
+ if backend not in self._strategies:
976
+ raise KeyError(f"Unknown backend: {backend}")
977
+ return self._strategies[backend]
978
+
979
+ def get_available(self) -> list[ExecutionBackend]:
980
+ """Get list of available backends."""
981
+ return [
982
+ backend
983
+ for backend, strategy in self._strategies.items()
984
+ if strategy.is_available()
985
+ ]
986
+
987
+
988
+ # Global registry
989
+ execution_strategy_registry = ExecutionStrategyRegistry()
990
+
991
+
992
+ # =============================================================================
993
+ # Resource Monitor
994
+ # =============================================================================
995
+
996
+
997
+ @dataclass
998
+ class ResourceLimits:
999
+ """Resource limits for execution.
1000
+
1001
+ Attributes:
1002
+ max_memory_mb: Maximum memory usage
1003
+ max_cpu_percent: Maximum CPU usage
1004
+ max_open_files: Maximum open file descriptors
1005
+ """
1006
+
1007
+ max_memory_mb: float = 0 # 0 = unlimited
1008
+ max_cpu_percent: float = 0 # 0 = unlimited
1009
+ max_open_files: int = 0 # 0 = unlimited
1010
+
1011
+
1012
+ @dataclass
1013
+ class ResourceUsage:
1014
+ """Current resource usage."""
1015
+
1016
+ memory_mb: float = 0.0
1017
+ cpu_percent: float = 0.0
1018
+ open_files: int = 0
1019
+
1020
+
1021
+ class ResourceMonitor:
1022
+ """Monitors resource usage during execution.
1023
+
1024
+ Uses psutil if available, otherwise provides estimates.
1025
+ """
1026
+
1027
+ def __init__(self):
1028
+ self._psutil_available = self._check_psutil()
1029
+
1030
+ def _check_psutil(self) -> bool:
1031
+ """Check if psutil is available."""
1032
+ try:
1033
+ import psutil
1034
+ return True
1035
+ except ImportError:
1036
+ return False
1037
+
1038
+ def get_current_usage(self) -> ResourceUsage:
1039
+ """Get current resource usage."""
1040
+ if not self._psutil_available:
1041
+ return ResourceUsage()
1042
+
1043
+ try:
1044
+ import psutil
1045
+ process = psutil.Process()
1046
+
1047
+ memory_info = process.memory_info()
1048
+ memory_mb = memory_info.rss / (1024 * 1024)
1049
+
1050
+ cpu_percent = process.cpu_percent()
1051
+
1052
+ try:
1053
+ open_files = len(process.open_files())
1054
+ except Exception:
1055
+ open_files = 0
1056
+
1057
+ return ResourceUsage(
1058
+ memory_mb=memory_mb,
1059
+ cpu_percent=cpu_percent,
1060
+ open_files=open_files,
1061
+ )
1062
+ except Exception:
1063
+ return ResourceUsage()
1064
+
1065
+ def exceeds_limits(
1066
+ self,
1067
+ usage: ResourceUsage,
1068
+ limits: ResourceLimits,
1069
+ ) -> tuple[bool, str]:
1070
+ """Check if usage exceeds limits.
1071
+
1072
+ Returns:
1073
+ Tuple of (exceeds, reason)
1074
+ """
1075
+ if limits.max_memory_mb > 0 and usage.memory_mb > limits.max_memory_mb:
1076
+ return True, f"Memory usage {usage.memory_mb:.1f}MB exceeds limit {limits.max_memory_mb:.1f}MB"
1077
+
1078
+ if limits.max_cpu_percent > 0 and usage.cpu_percent > limits.max_cpu_percent:
1079
+ return True, f"CPU usage {usage.cpu_percent:.1f}% exceeds limit {limits.max_cpu_percent:.1f}%"
1080
+
1081
+ if limits.max_open_files > 0 and usage.open_files > limits.max_open_files:
1082
+ return True, f"Open files {usage.open_files} exceeds limit {limits.max_open_files}"
1083
+
1084
+ return False, ""
1085
+
1086
+
1087
+ # Global monitor
1088
+ resource_monitor = ResourceMonitor()
1089
+
1090
+
1091
+ # =============================================================================
1092
+ # Process Timeout Executor (Main Interface)
1093
+ # =============================================================================
1094
+
1095
+
1096
+ @dataclass
1097
+ class ProcessTimeoutConfig:
1098
+ """Configuration for process timeout executor.
1099
+
1100
+ Attributes:
1101
+ default_timeout_seconds: Default timeout
1102
+ default_backend: Default execution backend
1103
+ enable_circuit_breaker: Use circuit breaker
1104
+ enable_complexity_estimation: Pre-check complexity
1105
+ enable_resource_monitoring: Monitor resources
1106
+ resource_limits: Resource limits
1107
+ graceful_termination_seconds: Time before force kill
1108
+ max_retries: Maximum retry attempts
1109
+ retry_backoff_factor: Backoff multiplier for retries
1110
+ """
1111
+
1112
+ default_timeout_seconds: float = 60.0
1113
+ default_backend: ExecutionBackend = ExecutionBackend.ADAPTIVE
1114
+ enable_circuit_breaker: bool = True
1115
+ enable_complexity_estimation: bool = True
1116
+ enable_resource_monitoring: bool = True
1117
+ resource_limits: ResourceLimits = field(default_factory=ResourceLimits)
1118
+ graceful_termination_seconds: float = 2.0
1119
+ max_retries: int = 0
1120
+ retry_backoff_factor: float = 2.0
1121
+
1122
+ @classmethod
1123
+ def strict(cls) -> "ProcessTimeoutConfig":
1124
+ """Create strict configuration."""
1125
+ return cls(
1126
+ default_timeout_seconds=30.0,
1127
+ enable_circuit_breaker=True,
1128
+ resource_limits=ResourceLimits(max_memory_mb=1000),
1129
+ max_retries=0,
1130
+ )
1131
+
1132
+ @classmethod
1133
+ def lenient(cls) -> "ProcessTimeoutConfig":
1134
+ """Create lenient configuration."""
1135
+ return cls(
1136
+ default_timeout_seconds=300.0,
1137
+ enable_circuit_breaker=False,
1138
+ max_retries=2,
1139
+ )
1140
+
1141
+ @classmethod
1142
+ def fast(cls) -> "ProcessTimeoutConfig":
1143
+ """Create fast configuration (thread-based)."""
1144
+ return cls(
1145
+ default_timeout_seconds=10.0,
1146
+ default_backend=ExecutionBackend.THREAD,
1147
+ enable_complexity_estimation=False,
1148
+ )
1149
+
1150
+ @classmethod
1151
+ def safe(cls) -> "ProcessTimeoutConfig":
1152
+ """Create safe configuration (process-based)."""
1153
+ return cls(
1154
+ default_timeout_seconds=60.0,
1155
+ default_backend=ExecutionBackend.PROCESS,
1156
+ enable_circuit_breaker=True,
1157
+ enable_complexity_estimation=True,
1158
+ )
1159
+
1160
+
1161
+ class ProcessTimeoutExecutor:
1162
+ """Enterprise-grade timeout executor with process isolation.
1163
+
1164
+ This is the main interface for executing operations with reliable
1165
+ timeout control, including native code like Polars.
1166
+
1167
+ Features:
1168
+ - Process isolation for reliable termination
1169
+ - Pre-execution complexity estimation
1170
+ - Circuit breaker for cascade prevention
1171
+ - Resource monitoring
1172
+ - Retry with backoff
1173
+
1174
+ Example:
1175
+ # Basic usage
1176
+ executor = ProcessTimeoutExecutor()
1177
+ result = executor.execute(
1178
+ lambda: expensive_operation(),
1179
+ timeout_seconds=30,
1180
+ )
1181
+
1182
+ if result.success:
1183
+ print(result.value)
1184
+ elif result.timed_out:
1185
+ print("Operation timed out")
1186
+ else:
1187
+ print(f"Error: {result.error}")
1188
+
1189
+ # With hints for better execution
1190
+ result = executor.execute_with_hints(
1191
+ lambda: profile_column(df, "email"),
1192
+ timeout_seconds=60,
1193
+ operation_type="profile_column",
1194
+ data_size=1_000_000,
1195
+ )
1196
+ """
1197
+
1198
+ def __init__(
1199
+ self,
1200
+ config: ProcessTimeoutConfig | None = None,
1201
+ estimator: ComplexityEstimator | None = None,
1202
+ ):
1203
+ """Initialize executor.
1204
+
1205
+ Args:
1206
+ config: Executor configuration
1207
+ estimator: Complexity estimator
1208
+ """
1209
+ self.config = config or ProcessTimeoutConfig()
1210
+ self.estimator = estimator or default_complexity_estimator
1211
+ self._circuit_breakers: dict[str, CircuitBreaker] = {}
1212
+ self._lock = threading.RLock()
1213
+
1214
+ def execute(
1215
+ self,
1216
+ func: Callable[[], T],
1217
+ timeout_seconds: float | None = None,
1218
+ backend: ExecutionBackend | None = None,
1219
+ operation_name: str = "operation",
1220
+ ) -> ExecutionResult[T]:
1221
+ """Execute function with timeout.
1222
+
1223
+ Args:
1224
+ func: Function to execute
1225
+ timeout_seconds: Timeout (uses config default if None)
1226
+ backend: Execution backend (uses config default if None)
1227
+ operation_name: Name for logging and circuit breaker
1228
+
1229
+ Returns:
1230
+ Execution result
1231
+ """
1232
+ timeout = timeout_seconds or self.config.default_timeout_seconds
1233
+ backend = backend or self.config.default_backend
1234
+
1235
+ # Check circuit breaker
1236
+ if self.config.enable_circuit_breaker:
1237
+ breaker = self._get_circuit_breaker(operation_name)
1238
+ if not breaker.can_execute():
1239
+ return ExecutionResult.failure(
1240
+ RuntimeError(f"Circuit breaker open for '{operation_name}'"),
1241
+ ExecutionMetrics(),
1242
+ )
1243
+
1244
+ # Check resources
1245
+ if self.config.enable_resource_monitoring:
1246
+ usage = resource_monitor.get_current_usage()
1247
+ exceeds, reason = resource_monitor.exceeds_limits(
1248
+ usage, self.config.resource_limits
1249
+ )
1250
+ if exceeds:
1251
+ return ExecutionResult.failure(
1252
+ RuntimeError(f"Resource limit exceeded: {reason}"),
1253
+ ExecutionMetrics(),
1254
+ )
1255
+
1256
+ # Execute with retry
1257
+ result = self._execute_with_retry(func, timeout, backend, operation_name)
1258
+
1259
+ # Update circuit breaker
1260
+ if self.config.enable_circuit_breaker:
1261
+ if result.success:
1262
+ breaker.record_success()
1263
+ else:
1264
+ breaker.record_failure()
1265
+
1266
+ return result
1267
+
1268
+ def execute_with_hints(
1269
+ self,
1270
+ func: Callable[[], T],
1271
+ timeout_seconds: float | None = None,
1272
+ operation_type: str = "default",
1273
+ data_size: int = 0,
1274
+ column_count: int = 1,
1275
+ ) -> ExecutionResult[T]:
1276
+ """Execute with complexity hints for better decisions.
1277
+
1278
+ Args:
1279
+ func: Function to execute
1280
+ timeout_seconds: Timeout
1281
+ operation_type: Type of operation
1282
+ data_size: Number of rows
1283
+ column_count: Number of columns
1284
+
1285
+ Returns:
1286
+ Execution result
1287
+ """
1288
+ timeout = timeout_seconds or self.config.default_timeout_seconds
1289
+
1290
+ # Estimate complexity
1291
+ if self.config.enable_complexity_estimation:
1292
+ estimate = self.estimator.estimate(
1293
+ operation_type=operation_type,
1294
+ data_size=data_size,
1295
+ column_count=column_count,
1296
+ )
1297
+
1298
+ # Check if operation will likely timeout
1299
+ if estimate.exceeds_timeout(timeout):
1300
+ logger.warning(
1301
+ f"Operation '{operation_type}' estimated to take "
1302
+ f"{estimate.estimated_time_seconds:.1f}s, exceeds timeout {timeout}s. "
1303
+ f"Consider sampling to {estimate.max_safe_rows} rows."
1304
+ )
1305
+
1306
+ # Use recommended backend
1307
+ backend = estimate.recommendation
1308
+ else:
1309
+ backend = self.config.default_backend
1310
+
1311
+ return self.execute(
1312
+ func,
1313
+ timeout_seconds=timeout,
1314
+ backend=backend,
1315
+ operation_name=operation_type,
1316
+ )
1317
+
1318
+ def execute_safe(
1319
+ self,
1320
+ func: Callable[[], T],
1321
+ timeout_seconds: float | None = None,
1322
+ default: T | None = None,
1323
+ ) -> T | None:
1324
+ """Execute and return default on failure.
1325
+
1326
+ Args:
1327
+ func: Function to execute
1328
+ timeout_seconds: Timeout
1329
+ default: Default value on failure
1330
+
1331
+ Returns:
1332
+ Result value or default
1333
+ """
1334
+ result = self.execute(func, timeout_seconds)
1335
+ if result.success:
1336
+ return result.value
1337
+ return default
1338
+
1339
+ def _execute_with_retry(
1340
+ self,
1341
+ func: Callable[[], T],
1342
+ timeout: float,
1343
+ backend: ExecutionBackend,
1344
+ operation_name: str,
1345
+ ) -> ExecutionResult[T]:
1346
+ """Execute with retry logic."""
1347
+ strategy = execution_strategy_registry.get(backend)
1348
+ retries = 0
1349
+ current_timeout = timeout
1350
+
1351
+ while True:
1352
+ result = strategy.execute(func, current_timeout)
1353
+ result.metrics.retries = retries
1354
+
1355
+ if result.success or not result.timed_out:
1356
+ return result
1357
+
1358
+ # Retry logic
1359
+ if retries >= self.config.max_retries:
1360
+ return result
1361
+
1362
+ retries += 1
1363
+ current_timeout *= self.config.retry_backoff_factor
1364
+
1365
+ logger.info(
1366
+ f"Retrying '{operation_name}' (attempt {retries + 1}/{self.config.max_retries + 1}), "
1367
+ f"timeout={current_timeout:.1f}s"
1368
+ )
1369
+
1370
+ def _get_circuit_breaker(self, name: str) -> CircuitBreaker:
1371
+ """Get or create circuit breaker for operation."""
1372
+ with self._lock:
1373
+ if name not in self._circuit_breakers:
1374
+ self._circuit_breakers[name] = CircuitBreaker(name)
1375
+ return self._circuit_breakers[name]
1376
+
1377
+ def get_stats(self) -> dict[str, Any]:
1378
+ """Get executor statistics."""
1379
+ return {
1380
+ "config": {
1381
+ "default_timeout": self.config.default_timeout_seconds,
1382
+ "default_backend": self.config.default_backend.value,
1383
+ },
1384
+ "circuit_breakers": {
1385
+ name: breaker.get_stats()
1386
+ for name, breaker in self._circuit_breakers.items()
1387
+ },
1388
+ }
1389
+
1390
+ def reset_circuit_breakers(self) -> None:
1391
+ """Reset all circuit breakers."""
1392
+ with self._lock:
1393
+ for breaker in self._circuit_breakers.values():
1394
+ breaker.reset()
1395
+
1396
+
1397
+ # =============================================================================
1398
+ # Convenience Functions
1399
+ # =============================================================================
1400
+
1401
+
1402
+ def with_process_timeout(
1403
+ func: Callable[[], T],
1404
+ timeout_seconds: float,
1405
+ default: T | None = None,
1406
+ ) -> T | None:
1407
+ """Execute function with process-based timeout.
1408
+
1409
+ Simple convenience function for one-off executions.
1410
+
1411
+ Args:
1412
+ func: Function to execute
1413
+ timeout_seconds: Timeout in seconds
1414
+ default: Value to return on timeout/failure
1415
+
1416
+ Returns:
1417
+ Function result or default
1418
+
1419
+ Example:
1420
+ result = with_process_timeout(
1421
+ lambda: expensive_polars_operation(),
1422
+ timeout_seconds=30,
1423
+ default=None,
1424
+ )
1425
+ """
1426
+ executor = ProcessTimeoutExecutor()
1427
+ return executor.execute_safe(func, timeout_seconds, default)
1428
+
1429
+
1430
+ def estimate_execution_time(
1431
+ operation_type: str,
1432
+ data_size: int,
1433
+ column_count: int = 1,
1434
+ ) -> ComplexityEstimate:
1435
+ """Estimate execution time for an operation.
1436
+
1437
+ Args:
1438
+ operation_type: Type of operation
1439
+ data_size: Number of rows
1440
+ column_count: Number of columns
1441
+
1442
+ Returns:
1443
+ Complexity estimate
1444
+
1445
+ Example:
1446
+ estimate = estimate_execution_time("pattern_match", 1_000_000)
1447
+ print(f"Estimated time: {estimate.estimated_time_seconds:.1f}s")
1448
+ print(f"Recommended backend: {estimate.recommendation.value}")
1449
+ """
1450
+ return default_complexity_estimator.estimate(
1451
+ operation_type=operation_type,
1452
+ data_size=data_size,
1453
+ column_count=column_count,
1454
+ )
1455
+
1456
+
1457
+ def create_timeout_executor(
1458
+ timeout_seconds: float = 60.0,
1459
+ backend: str = "adaptive",
1460
+ enable_circuit_breaker: bool = True,
1461
+ ) -> ProcessTimeoutExecutor:
1462
+ """Create a configured timeout executor.
1463
+
1464
+ Args:
1465
+ timeout_seconds: Default timeout
1466
+ backend: Execution backend (thread, process, adaptive)
1467
+ enable_circuit_breaker: Enable circuit breaker
1468
+
1469
+ Returns:
1470
+ Configured executor
1471
+
1472
+ Example:
1473
+ executor = create_timeout_executor(
1474
+ timeout_seconds=30,
1475
+ backend="process",
1476
+ )
1477
+ """
1478
+ config = ProcessTimeoutConfig(
1479
+ default_timeout_seconds=timeout_seconds,
1480
+ default_backend=ExecutionBackend(backend),
1481
+ enable_circuit_breaker=enable_circuit_breaker,
1482
+ )
1483
+ return ProcessTimeoutExecutor(config)
1484
+
1485
+
1486
+ # =============================================================================
1487
+ # Context Manager
1488
+ # =============================================================================
1489
+
1490
+
1491
+ @contextmanager
1492
+ def process_timeout_context(
1493
+ timeout_seconds: float,
1494
+ operation_name: str = "operation",
1495
+ ) -> Generator[ProcessTimeoutExecutor, None, None]:
1496
+ """Context manager for process timeout execution.
1497
+
1498
+ Args:
1499
+ timeout_seconds: Timeout in seconds
1500
+ operation_name: Name for logging
1501
+
1502
+ Yields:
1503
+ Executor instance
1504
+
1505
+ Example:
1506
+ with process_timeout_context(30.0, "profiling") as executor:
1507
+ result = executor.execute(lambda: profile(data))
1508
+ """
1509
+ config = ProcessTimeoutConfig(default_timeout_seconds=timeout_seconds)
1510
+ executor = ProcessTimeoutExecutor(config)
1511
+
1512
+ try:
1513
+ yield executor
1514
+ finally:
1515
+ # Cleanup if needed
1516
+ pass
1517
+
1518
+
1519
+ # =============================================================================
1520
+ # Decorator
1521
+ # =============================================================================
1522
+
1523
+
1524
+ def timeout_protected(
1525
+ timeout_seconds: float = 60.0,
1526
+ backend: ExecutionBackend = ExecutionBackend.ADAPTIVE,
1527
+ default: Any = None,
1528
+ ) -> Callable[[Callable[..., T]], Callable[..., T | None]]:
1529
+ """Decorator to add timeout protection to a function.
1530
+
1531
+ Args:
1532
+ timeout_seconds: Timeout in seconds
1533
+ backend: Execution backend
1534
+ default: Default value on timeout
1535
+
1536
+ Returns:
1537
+ Decorated function
1538
+
1539
+ Example:
1540
+ @timeout_protected(timeout_seconds=30)
1541
+ def expensive_operation(data):
1542
+ return process(data)
1543
+
1544
+ result = expensive_operation(my_data) # Will timeout after 30s
1545
+ """
1546
+ def decorator(func: Callable[..., T]) -> Callable[..., T | None]:
1547
+ executor = ProcessTimeoutExecutor(
1548
+ ProcessTimeoutConfig(
1549
+ default_timeout_seconds=timeout_seconds,
1550
+ default_backend=backend,
1551
+ )
1552
+ )
1553
+
1554
+ @functools.wraps(func)
1555
+ def wrapper(*args: Any, **kwargs: Any) -> T | None:
1556
+ result = executor.execute(
1557
+ lambda: func(*args, **kwargs),
1558
+ operation_name=func.__name__,
1559
+ )
1560
+ if result.success:
1561
+ return result.value
1562
+ return default
1563
+
1564
+ return wrapper
1565
+
1566
+ return decorator