synth-ai 0.2.8.dev2__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (740) hide show
  1. synth_ai/__init__.py +44 -24
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +103 -48
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +256 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +53 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  63. synth_ai/cli/commands/train/judge_validation.py +305 -0
  64. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  65. synth_ai/cli/commands/train/validation.py +392 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/{demos → cli/demo_apps}/core/cli.py +783 -441
  69. synth_ai/cli/demo_apps/crafter/__init__.py +1 -0
  70. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  71. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  72. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  73. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/__init__.py +7 -0
  75. synth_ai/{demos → cli/demo_apps}/demo_task_apps/core.py +75 -37
  76. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  80. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +1 -2
  81. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +2 -1
  82. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  83. synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +3 -6
  84. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  85. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  86. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  87. synth_ai/cli/demo_apps/math/_common.py +16 -0
  88. synth_ai/cli/demo_apps/math/app.py +38 -0
  89. synth_ai/cli/demo_apps/math/config.toml +75 -0
  90. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  91. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  92. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  93. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  94. synth_ai/cli/demo_apps/mipro/task_app.py +922 -0
  95. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  96. synth_ai/cli/demos/__init__.py +12 -0
  97. synth_ai/cli/demos/demo.py +32 -0
  98. synth_ai/cli/demos/rl_demo.py +254 -0
  99. synth_ai/cli/deploy.py +216 -0
  100. synth_ai/cli/infra/__init__.py +14 -0
  101. synth_ai/cli/{balance.py → infra/balance.py} +16 -4
  102. synth_ai/cli/infra/mcp.py +35 -0
  103. synth_ai/cli/infra/modal_app.py +36 -0
  104. synth_ai/cli/infra/setup.py +69 -0
  105. synth_ai/cli/infra/status.py +16 -0
  106. synth_ai/cli/infra/turso.py +77 -0
  107. synth_ai/cli/lib/__init__.py +10 -0
  108. synth_ai/cli/lib/agents.py +76 -0
  109. synth_ai/cli/lib/apps/modal_app.py +101 -0
  110. synth_ai/cli/lib/apps/task_app.py +642 -0
  111. synth_ai/cli/lib/bin.py +39 -0
  112. synth_ai/cli/lib/env.py +375 -0
  113. synth_ai/cli/lib/errors.py +85 -0
  114. synth_ai/cli/lib/modal.py +315 -0
  115. synth_ai/cli/lib/plotting.py +126 -0
  116. synth_ai/cli/lib/prompt_args.py +39 -0
  117. synth_ai/cli/lib/prompts.py +284 -0
  118. synth_ai/cli/lib/sqld.py +122 -0
  119. synth_ai/cli/lib/task_app_discovery.py +884 -0
  120. synth_ai/cli/lib/task_app_env.py +295 -0
  121. synth_ai/cli/lib/train_cfgs.py +300 -0
  122. synth_ai/cli/lib/tunnel_records.py +207 -0
  123. synth_ai/cli/local/__init__.py +14 -0
  124. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  125. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  126. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  127. synth_ai/cli/local/experiment_queue/config.py +128 -0
  128. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  129. synth_ai/cli/local/experiment_queue/database.py +175 -0
  130. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  131. synth_ai/cli/local/experiment_queue/models.py +231 -0
  132. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  133. synth_ai/cli/local/experiment_queue/results.py +373 -0
  134. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  135. synth_ai/cli/local/experiment_queue/service.py +344 -0
  136. synth_ai/cli/local/experiment_queue/status.py +372 -0
  137. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  138. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  139. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  140. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  141. synth_ai/cli/local/session/__init__.py +92 -0
  142. synth_ai/cli/local/session/client.py +383 -0
  143. synth_ai/cli/local/session/constants.py +63 -0
  144. synth_ai/cli/local/session/exceptions.py +105 -0
  145. synth_ai/cli/local/session/manager.py +139 -0
  146. synth_ai/cli/local/session/models.py +89 -0
  147. synth_ai/cli/local/session/query.py +110 -0
  148. synth_ai/cli/root.py +150 -108
  149. synth_ai/cli/task_apps/__init__.py +37 -0
  150. synth_ai/cli/task_apps/commands.py +3145 -0
  151. synth_ai/cli/task_apps/deploy.py +7 -0
  152. synth_ai/cli/task_apps/list.py +26 -0
  153. synth_ai/cli/task_apps/main.py +36 -0
  154. synth_ai/cli/task_apps/modal_serve.py +11 -0
  155. synth_ai/cli/task_apps/serve.py +11 -0
  156. synth_ai/cli/training/__init__.py +8 -0
  157. synth_ai/cli/training/train.py +5 -0
  158. synth_ai/cli/training/train_cfg.py +34 -0
  159. synth_ai/cli/{watch.py → training/watch.py} +13 -18
  160. synth_ai/cli/turso.py +52 -0
  161. synth_ai/cli/utils/__init__.py +8 -0
  162. synth_ai/cli/utils/experiments.py +235 -0
  163. synth_ai/cli/utils/queue.py +504 -0
  164. synth_ai/cli/{recent.py → utils/recent.py} +13 -7
  165. synth_ai/cli/{traces.py → utils/traces.py} +9 -5
  166. synth_ai/contracts/__init__.py +67 -0
  167. synth_ai/core/__init__.py +100 -0
  168. synth_ai/core/_utils/__init__.py +54 -0
  169. synth_ai/core/_utils/base_url.py +10 -0
  170. synth_ai/core/_utils/http.py +10 -0
  171. synth_ai/core/_utils/prompts.py +14 -0
  172. synth_ai/core/_utils/task_app_state.py +12 -0
  173. synth_ai/core/_utils/user_config.py +10 -0
  174. synth_ai/core/apps/common.py +116 -0
  175. synth_ai/core/auth.py +95 -0
  176. synth_ai/core/cfgs.py +240 -0
  177. synth_ai/core/config/__init__.py +16 -0
  178. synth_ai/core/config/base.py +168 -0
  179. synth_ai/core/config/resolver.py +89 -0
  180. synth_ai/core/env.py +231 -0
  181. synth_ai/core/errors.py +126 -0
  182. synth_ai/core/http.py +230 -0
  183. synth_ai/core/integrations/__init__.py +11 -0
  184. synth_ai/core/integrations/cloudflare.py +1710 -0
  185. synth_ai/core/integrations/mcp/__init__.py +6 -0
  186. synth_ai/core/integrations/mcp/__main__.py +8 -0
  187. synth_ai/core/integrations/mcp/claude.py +36 -0
  188. synth_ai/core/integrations/mcp/main.py +254 -0
  189. synth_ai/core/integrations/mcp/setup.py +100 -0
  190. synth_ai/core/integrations/modal.py +277 -0
  191. synth_ai/core/json.py +72 -0
  192. synth_ai/core/log_filter.py +99 -0
  193. synth_ai/core/logging.py +82 -0
  194. synth_ai/core/paths.py +107 -0
  195. synth_ai/core/pricing.py +109 -0
  196. synth_ai/core/process.py +233 -0
  197. synth_ai/core/ssl.py +25 -0
  198. synth_ai/core/storage/__init__.py +71 -0
  199. synth_ai/core/task_app_state.py +318 -0
  200. synth_ai/core/telemetry.py +282 -0
  201. synth_ai/{tracing_v3 → core/tracing_v3}/__init__.py +5 -1
  202. synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +21 -4
  203. synth_ai/core/tracing_v3/config.py +229 -0
  204. synth_ai/core/tracing_v3/constants.py +21 -0
  205. synth_ai/{tracing_v3 → core/tracing_v3}/db_config.py +42 -29
  206. synth_ai/{tracing_v3 → core/tracing_v3}/decorators.py +80 -45
  207. synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +15 -9
  208. synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +6 -4
  209. synth_ai/{tracing_v3 → core/tracing_v3}/llm_call_record_helpers.py +161 -61
  210. synth_ai/{tracing_v3 → core/tracing_v3}/migration_helper.py +1 -2
  211. synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +12 -7
  212. synth_ai/core/tracing_v3/serialization.py +130 -0
  213. synth_ai/{tracing_v3 → core/tracing_v3}/session_tracer.py +88 -21
  214. synth_ai/{tracing_v3 → core/tracing_v3}/storage/base.py +99 -12
  215. synth_ai/core/tracing_v3/storage/config.py +109 -0
  216. synth_ai/{tracing_v3 → core/tracing_v3}/storage/factory.py +11 -9
  217. synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +15 -11
  218. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  219. synth_ai/core/tracing_v3/turso/__init__.py +12 -0
  220. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  221. synth_ai/{tracing_v3 → core/tracing_v3}/turso/models.py +7 -3
  222. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  223. synth_ai/{tracing_v3 → core/tracing_v3}/utils.py +5 -4
  224. synth_ai/core/urls.py +18 -0
  225. synth_ai/core/user_config.py +137 -0
  226. synth_ai/core/uvicorn.py +222 -0
  227. synth_ai/data/__init__.py +83 -0
  228. synth_ai/data/enums.py +123 -0
  229. synth_ai/data/rewards.py +152 -0
  230. synth_ai/data/traces.py +35 -0
  231. synth_ai/products/__init__.py +6 -0
  232. synth_ai/products/graph_evolve/__init__.py +46 -0
  233. synth_ai/products/graph_evolve/client.py +226 -0
  234. synth_ai/products/graph_evolve/config.py +591 -0
  235. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  236. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  237. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  238. synth_ai/products/graph_evolve/run.py +222 -0
  239. synth_ai/products/graph_gepa/__init__.py +23 -0
  240. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  241. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  242. synth_ai/sdk/__init__.py +123 -0
  243. synth_ai/sdk/api/__init__.py +1 -0
  244. synth_ai/sdk/api/models/supported.py +514 -0
  245. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  246. synth_ai/sdk/api/train/__init__.py +85 -0
  247. synth_ai/sdk/api/train/builders.py +895 -0
  248. synth_ai/sdk/api/train/cli.py +2199 -0
  249. synth_ai/sdk/api/train/config_finder.py +267 -0
  250. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  251. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  252. synth_ai/sdk/api/train/configs/rl.py +187 -0
  253. synth_ai/sdk/api/train/configs/sft.py +99 -0
  254. synth_ai/sdk/api/train/configs/shared.py +81 -0
  255. synth_ai/sdk/api/train/context_learning.py +312 -0
  256. synth_ai/sdk/api/train/env_resolver.py +418 -0
  257. synth_ai/sdk/api/train/graph_validators.py +216 -0
  258. synth_ai/sdk/api/train/graphgen.py +984 -0
  259. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  260. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  261. synth_ai/sdk/api/train/local_api.py +10 -0
  262. synth_ai/sdk/api/train/pollers.py +124 -0
  263. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  264. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  265. synth_ai/sdk/api/train/progress/events.py +326 -0
  266. synth_ai/sdk/api/train/progress/results.py +428 -0
  267. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  268. synth_ai/sdk/api/train/prompt_learning.py +469 -0
  269. synth_ai/sdk/api/train/rl.py +441 -0
  270. synth_ai/sdk/api/train/sft.py +396 -0
  271. synth_ai/sdk/api/train/summary.py +522 -0
  272. synth_ai/sdk/api/train/supported_algos.py +147 -0
  273. synth_ai/sdk/api/train/task_app.py +351 -0
  274. synth_ai/sdk/api/train/utils.py +279 -0
  275. synth_ai/sdk/api/train/validators.py +2424 -0
  276. synth_ai/sdk/graphs/__init__.py +15 -0
  277. synth_ai/sdk/graphs/completions.py +570 -0
  278. synth_ai/{inference → sdk/inference}/__init__.py +0 -1
  279. synth_ai/sdk/inference/client.py +128 -0
  280. synth_ai/sdk/jobs/__init__.py +16 -0
  281. synth_ai/sdk/jobs/client.py +371 -0
  282. synth_ai/sdk/judging/__init__.py +14 -0
  283. synth_ai/sdk/judging/base.py +24 -0
  284. synth_ai/sdk/judging/client.py +40 -0
  285. synth_ai/sdk/judging/schemas.py +222 -0
  286. synth_ai/sdk/judging/types.py +42 -0
  287. synth_ai/sdk/learning/__init__.py +99 -0
  288. synth_ai/sdk/learning/algorithms.py +14 -0
  289. synth_ai/{learning → sdk/learning}/client.py +121 -30
  290. synth_ai/sdk/learning/config.py +5 -0
  291. synth_ai/{learning → sdk/learning}/constants.py +0 -2
  292. synth_ai/sdk/learning/context_learning_client.py +531 -0
  293. synth_ai/sdk/learning/context_learning_types.py +292 -0
  294. synth_ai/sdk/learning/ft_client.py +7 -0
  295. synth_ai/{learning → sdk/learning}/health.py +15 -9
  296. synth_ai/{learning → sdk/learning}/jobs.py +44 -47
  297. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  298. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  299. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  300. synth_ai/{rl → sdk/learning/rl}/__init__.py +13 -8
  301. synth_ai/{learning/rl_client.py → sdk/learning/rl/client.py} +89 -77
  302. synth_ai/sdk/learning/rl/config.py +31 -0
  303. synth_ai/{rl → sdk/learning/rl}/contracts.py +5 -14
  304. synth_ai/{rl → sdk/learning/rl}/env_keys.py +45 -16
  305. synth_ai/sdk/learning/rl/secrets.py +13 -0
  306. synth_ai/sdk/learning/rl_client.py +5 -0
  307. synth_ai/sdk/learning/sft/__init__.py +29 -0
  308. synth_ai/sdk/learning/sft/client.py +95 -0
  309. synth_ai/sdk/learning/sft/config.py +270 -0
  310. synth_ai/sdk/learning/sft/data.py +698 -0
  311. synth_ai/sdk/learning/sse.py +57 -0
  312. synth_ai/sdk/learning/validators.py +52 -0
  313. synth_ai/sdk/localapi/__init__.py +40 -0
  314. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  315. synth_ai/sdk/localapi/client.py +10 -0
  316. synth_ai/sdk/localapi/contracts.py +10 -0
  317. synth_ai/sdk/localapi/helpers.py +519 -0
  318. synth_ai/sdk/localapi/rollouts.py +87 -0
  319. synth_ai/sdk/localapi/server.py +29 -0
  320. synth_ai/sdk/localapi/template.py +70 -0
  321. synth_ai/sdk/streaming/__init__.py +35 -0
  322. synth_ai/sdk/streaming/config.py +94 -0
  323. synth_ai/sdk/streaming/handlers.py +1997 -0
  324. synth_ai/sdk/streaming/streamer.py +713 -0
  325. synth_ai/sdk/streaming/types.py +112 -0
  326. synth_ai/sdk/task/__init__.py +164 -0
  327. synth_ai/sdk/task/apps/__init__.py +169 -0
  328. synth_ai/sdk/task/auth.py +165 -0
  329. synth_ai/sdk/task/client.py +175 -0
  330. synth_ai/sdk/task/config.py +257 -0
  331. synth_ai/sdk/task/contracts.py +219 -0
  332. synth_ai/sdk/task/datasets.py +108 -0
  333. synth_ai/sdk/task/errors.py +50 -0
  334. synth_ai/sdk/task/health.py +34 -0
  335. synth_ai/sdk/task/in_process.py +1190 -0
  336. synth_ai/sdk/task/in_process_runner.py +314 -0
  337. synth_ai/sdk/task/inference_api.py +299 -0
  338. synth_ai/sdk/task/json.py +111 -0
  339. synth_ai/sdk/task/proxy.py +287 -0
  340. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  341. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  342. synth_ai/sdk/task/rubrics/models.py +57 -0
  343. synth_ai/sdk/task/rubrics/scoring.py +116 -0
  344. synth_ai/sdk/task/rubrics/strict.py +149 -0
  345. synth_ai/sdk/task/rubrics.py +219 -0
  346. synth_ai/sdk/task/server.py +631 -0
  347. synth_ai/sdk/task/trace_correlation_helpers.py +539 -0
  348. synth_ai/sdk/task/tracing_utils.py +95 -0
  349. synth_ai/sdk/task/validators.py +441 -0
  350. synth_ai/sdk/task/vendors.py +59 -0
  351. synth_ai/sdk/training/__init__.py +102 -0
  352. synth_ai/sdk/tunnels/__init__.py +83 -0
  353. synth_ai/sdk/tunnels/cleanup.py +83 -0
  354. synth_ai/sdk/tunnels/ports.py +120 -0
  355. synth_ai/utils/__init__.py +213 -0
  356. synth_ai-0.4.3.dist-info/METADATA +262 -0
  357. synth_ai-0.4.3.dist-info/RECORD +370 -0
  358. {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/entry_points.txt +0 -1
  359. synth_ai/cli/calc.py +0 -69
  360. synth_ai/cli/demo.py +0 -144
  361. synth_ai/cli/legacy_root_backup.py +0 -470
  362. synth_ai/cli/man.py +0 -106
  363. synth_ai/cli/rl_demo.py +0 -202
  364. synth_ai/cli/status.py +0 -133
  365. synth_ai/config/base_url.py +0 -107
  366. synth_ai/core/experiment.py +0 -15
  367. synth_ai/core/system.py +0 -15
  368. synth_ai/demos/core/__init__.py +0 -1
  369. synth_ai/demos/demo_task_apps/__init__.py +0 -1
  370. synth_ai/demos/demo_task_apps/math/config.toml +0 -129
  371. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  372. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -415
  373. synth_ai/environments/__init__.py +0 -31
  374. synth_ai/environments/environment/__init__.py +0 -1
  375. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  376. synth_ai/environments/environment/artifacts/base.py +0 -52
  377. synth_ai/environments/environment/core.py +0 -67
  378. synth_ai/environments/environment/db/__init__.py +0 -1
  379. synth_ai/environments/environment/db/sqlite.py +0 -45
  380. synth_ai/environments/environment/registry.py +0 -233
  381. synth_ai/environments/environment/resources/sqlite.py +0 -45
  382. synth_ai/environments/environment/results.py +0 -1
  383. synth_ai/environments/environment/rewards/__init__.py +0 -1
  384. synth_ai/environments/environment/rewards/core.py +0 -29
  385. synth_ai/environments/environment/shared_engine.py +0 -26
  386. synth_ai/environments/environment/tools/__init__.py +0 -200
  387. synth_ai/environments/examples/__init__.py +0 -1
  388. synth_ai/environments/examples/bandit/__init__.py +0 -33
  389. synth_ai/environments/examples/bandit/engine.py +0 -294
  390. synth_ai/environments/examples/bandit/environment.py +0 -194
  391. synth_ai/environments/examples/bandit/taskset.py +0 -200
  392. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  393. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  394. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  395. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  396. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  397. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  398. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  399. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  400. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  401. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  402. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  403. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  404. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  405. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  406. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  407. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  408. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  409. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  410. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  411. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  412. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  413. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  414. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  415. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  416. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  417. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  418. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  419. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  420. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  421. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  422. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  423. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  424. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  425. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  426. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  427. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  428. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  429. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  430. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  431. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  432. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  433. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  434. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  435. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  436. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  437. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  438. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  439. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  440. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  441. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  442. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  443. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  444. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  445. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  446. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  447. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  448. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  449. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  450. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  451. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  452. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  453. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  454. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  455. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  456. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  457. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  458. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  459. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  460. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  461. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  462. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  463. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  464. synth_ai/environments/examples/crafter_classic/environment.py +0 -404
  465. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  466. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  467. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  468. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  469. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  470. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  471. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  472. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  473. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  474. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  475. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  476. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  477. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  478. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  479. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  480. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  481. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  482. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  483. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  484. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  485. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  486. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  487. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  488. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  489. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  490. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  491. synth_ai/environments/examples/enron/engine.py +0 -295
  492. synth_ai/environments/examples/enron/environment.py +0 -166
  493. synth_ai/environments/examples/enron/taskset.py +0 -112
  494. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  495. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  496. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  497. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  498. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  499. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  500. synth_ai/environments/examples/minigrid/engine.py +0 -589
  501. synth_ai/environments/examples/minigrid/environment.py +0 -274
  502. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  503. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  504. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  505. synth_ai/environments/examples/nethack/__init__.py +0 -7
  506. synth_ai/environments/examples/nethack/achievements.py +0 -337
  507. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  508. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  509. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  510. synth_ai/environments/examples/nethack/engine.py +0 -739
  511. synth_ai/environments/examples/nethack/environment.py +0 -256
  512. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  513. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  514. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  515. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  516. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  517. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  518. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  519. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  520. synth_ai/environments/examples/nethack/taskset.py +0 -323
  521. synth_ai/environments/examples/red/__init__.py +0 -7
  522. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  523. synth_ai/environments/examples/red/config_logging.py +0 -110
  524. synth_ai/environments/examples/red/engine.py +0 -694
  525. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  526. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
  527. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  528. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  529. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  530. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  531. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  532. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  533. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  534. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  535. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  536. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  537. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  538. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  539. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  540. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  541. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
  542. synth_ai/environments/examples/red/environment.py +0 -238
  543. synth_ai/environments/examples/red/taskset.py +0 -79
  544. synth_ai/environments/examples/red/units/__init__.py +0 -1
  545. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  546. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  547. synth_ai/environments/examples/sokoban/engine.py +0 -678
  548. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  549. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  550. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  551. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  552. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  553. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  554. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  555. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  556. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  557. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  558. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  559. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  560. synth_ai/environments/examples/sokoban/environment.py +0 -229
  561. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  562. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  563. synth_ai/environments/examples/sokoban/taskset.py +0 -428
  564. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  565. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  566. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  567. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  568. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  569. synth_ai/environments/examples/verilog/__init__.py +0 -10
  570. synth_ai/environments/examples/verilog/engine.py +0 -329
  571. synth_ai/environments/examples/verilog/environment.py +0 -350
  572. synth_ai/environments/examples/verilog/taskset.py +0 -420
  573. synth_ai/environments/examples/wordle/__init__.py +0 -29
  574. synth_ai/environments/examples/wordle/engine.py +0 -398
  575. synth_ai/environments/examples/wordle/environment.py +0 -159
  576. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  577. synth_ai/environments/examples/wordle/taskset.py +0 -230
  578. synth_ai/environments/reproducibility/core.py +0 -42
  579. synth_ai/environments/reproducibility/helpers.py +0 -0
  580. synth_ai/environments/reproducibility/tree.py +0 -364
  581. synth_ai/environments/service/app.py +0 -98
  582. synth_ai/environments/service/core_routes.py +0 -1020
  583. synth_ai/environments/service/external_registry.py +0 -56
  584. synth_ai/environments/service/registry.py +0 -9
  585. synth_ai/environments/stateful/__init__.py +0 -1
  586. synth_ai/environments/stateful/core.py +0 -163
  587. synth_ai/environments/stateful/engine.py +0 -21
  588. synth_ai/environments/stateful/state.py +0 -7
  589. synth_ai/environments/tasks/api.py +0 -19
  590. synth_ai/environments/tasks/core.py +0 -80
  591. synth_ai/environments/tasks/filters.py +0 -41
  592. synth_ai/environments/tasks/utils.py +0 -91
  593. synth_ai/environments/v0_observability/history.py +0 -3
  594. synth_ai/environments/v0_observability/log.py +0 -2
  595. synth_ai/evals/base.py +0 -15
  596. synth_ai/experimental/synth_oss.py +0 -446
  597. synth_ai/handshake.py +0 -63
  598. synth_ai/http.py +0 -26
  599. synth_ai/http_client.py +0 -104
  600. synth_ai/inference/client.py +0 -20
  601. synth_ai/install_sqld.sh +0 -40
  602. synth_ai/jobs/client.py +0 -246
  603. synth_ai/learning/__init__.py +0 -24
  604. synth_ai/learning/config.py +0 -43
  605. synth_ai/learning/filtering.py +0 -0
  606. synth_ai/learning/ft_client.py +0 -59
  607. synth_ai/learning/offline/dpo.py +0 -0
  608. synth_ai/learning/offline/providers.py +0 -7
  609. synth_ai/learning/offline/sft.py +0 -0
  610. synth_ai/learning/offline/shared.py +0 -0
  611. synth_ai/learning/online/grpo.py +0 -0
  612. synth_ai/learning/online/irft.py +0 -0
  613. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  614. synth_ai/learning/prompts/gepa.py +0 -0
  615. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  616. synth_ai/learning/prompts/mipro.py +0 -289
  617. synth_ai/learning/prompts/random_search.py +0 -246
  618. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  619. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  620. synth_ai/learning/sse.py +0 -58
  621. synth_ai/learning/validators.py +0 -48
  622. synth_ai/lm/__init__.py +0 -51
  623. synth_ai/lm/caching/constants.py +0 -6
  624. synth_ai/lm/caching/dbs.py +0 -0
  625. synth_ai/lm/caching/ephemeral.py +0 -102
  626. synth_ai/lm/caching/handler.py +0 -137
  627. synth_ai/lm/caching/initialize.py +0 -11
  628. synth_ai/lm/caching/persistent.py +0 -114
  629. synth_ai/lm/config.py +0 -110
  630. synth_ai/lm/constants.py +0 -32
  631. synth_ai/lm/core/__init__.py +0 -8
  632. synth_ai/lm/core/all.py +0 -73
  633. synth_ai/lm/core/exceptions.py +0 -7
  634. synth_ai/lm/core/main.py +0 -319
  635. synth_ai/lm/core/main_v3.py +0 -594
  636. synth_ai/lm/core/synth_models.py +0 -48
  637. synth_ai/lm/core/vendor_clients.py +0 -188
  638. synth_ai/lm/cost/__init__.py +0 -0
  639. synth_ai/lm/cost/monitor.py +0 -1
  640. synth_ai/lm/cost/statefulness.py +0 -1
  641. synth_ai/lm/injection.py +0 -80
  642. synth_ai/lm/overrides.py +0 -206
  643. synth_ai/lm/provider_support/__init__.py +0 -8
  644. synth_ai/lm/provider_support/anthropic.py +0 -972
  645. synth_ai/lm/provider_support/openai.py +0 -1139
  646. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  647. synth_ai/lm/structured_outputs/__init__.py +0 -0
  648. synth_ai/lm/structured_outputs/handler.py +0 -440
  649. synth_ai/lm/structured_outputs/inject.py +0 -297
  650. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  651. synth_ai/lm/tools/__init__.py +0 -3
  652. synth_ai/lm/tools/base.py +0 -172
  653. synth_ai/lm/unified_interface.py +0 -202
  654. synth_ai/lm/vendors/__init__.py +0 -0
  655. synth_ai/lm/vendors/base.py +0 -81
  656. synth_ai/lm/vendors/core/__init__.py +0 -0
  657. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  658. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  659. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  660. synth_ai/lm/vendors/core/openai_api.py +0 -225
  661. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  662. synth_ai/lm/vendors/local/__init__.py +0 -0
  663. synth_ai/lm/vendors/local/ollama.py +0 -0
  664. synth_ai/lm/vendors/openai_standard.py +0 -780
  665. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  666. synth_ai/lm/vendors/retries.py +0 -22
  667. synth_ai/lm/vendors/supported/__init__.py +0 -0
  668. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  669. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  670. synth_ai/lm/vendors/supported/grok.py +0 -75
  671. synth_ai/lm/vendors/supported/groq.py +0 -16
  672. synth_ai/lm/vendors/supported/ollama.py +0 -15
  673. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  674. synth_ai/lm/vendors/supported/together.py +0 -11
  675. synth_ai/lm/vendors/synth_client.py +0 -808
  676. synth_ai/lm/warmup.py +0 -186
  677. synth_ai/rl/secrets.py +0 -19
  678. synth_ai/scripts/verify_rewards.py +0 -100
  679. synth_ai/task/__init__.py +0 -10
  680. synth_ai/task/contracts.py +0 -120
  681. synth_ai/task/health.py +0 -28
  682. synth_ai/task/validators.py +0 -12
  683. synth_ai/tracing/__init__.py +0 -30
  684. synth_ai/tracing_v1/__init__.py +0 -33
  685. synth_ai/tracing_v3/config.py +0 -84
  686. synth_ai/tracing_v3/storage/config.py +0 -62
  687. synth_ai/tracing_v3/turso/__init__.py +0 -25
  688. synth_ai/tracing_v3/turso/daemon.py +0 -144
  689. synth_ai/tracing_v3/turso/manager.py +0 -760
  690. synth_ai/v0/tracing/__init__.py +0 -0
  691. synth_ai/v0/tracing/abstractions.py +0 -224
  692. synth_ai/v0/tracing/base_client.py +0 -91
  693. synth_ai/v0/tracing/client_manager.py +0 -131
  694. synth_ai/v0/tracing/config.py +0 -142
  695. synth_ai/v0/tracing/context.py +0 -146
  696. synth_ai/v0/tracing/decorators.py +0 -682
  697. synth_ai/v0/tracing/events/__init__.py +0 -0
  698. synth_ai/v0/tracing/events/manage.py +0 -147
  699. synth_ai/v0/tracing/events/scope.py +0 -86
  700. synth_ai/v0/tracing/events/store.py +0 -228
  701. synth_ai/v0/tracing/immediate_client.py +0 -151
  702. synth_ai/v0/tracing/local.py +0 -18
  703. synth_ai/v0/tracing/log_client_base.py +0 -73
  704. synth_ai/v0/tracing/retry_queue.py +0 -186
  705. synth_ai/v0/tracing/trackers.py +0 -515
  706. synth_ai/v0/tracing/upload.py +0 -512
  707. synth_ai/v0/tracing/utils.py +0 -9
  708. synth_ai/v0/tracing_v1/__init__.py +0 -16
  709. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  710. synth_ai/v0/tracing_v1/base_client.py +0 -91
  711. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  712. synth_ai/v0/tracing_v1/config.py +0 -142
  713. synth_ai/v0/tracing_v1/context.py +0 -146
  714. synth_ai/v0/tracing_v1/decorators.py +0 -703
  715. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  716. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  717. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  718. synth_ai/v0/tracing_v1/events/store.py +0 -228
  719. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  720. synth_ai/v0/tracing_v1/local.py +0 -18
  721. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  722. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  723. synth_ai/v0/tracing_v1/trackers.py +0 -515
  724. synth_ai/v0/tracing_v1/upload.py +0 -527
  725. synth_ai/v0/tracing_v1/utils.py +0 -9
  726. synth_ai/zyk/__init__.py +0 -30
  727. synth_ai-0.2.8.dev2.dist-info/METADATA +0 -129
  728. synth_ai-0.2.8.dev2.dist-info/RECORD +0 -420
  729. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  730. /synth_ai/{lm/caching → core/apps}/__init__.py +0 -0
  731. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  732. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  733. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  734. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  735. /synth_ai/{compound/cais.py → py.typed} +0 -0
  736. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  737. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  738. {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/WHEEL +0 -0
  739. {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/licenses/LICENSE +0 -0
  740. {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1984 @@
1
+ """Celery task definitions for running experiment jobs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import re
8
+ import shlex
9
+ import subprocess
10
+ import sys
11
+ import threading
12
+ import time
13
+ from datetime import UTC, datetime
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from celery.utils.log import get_task_logger
18
+ from dotenv import load_dotenv
19
+
20
+ from .api_schemas import BackendEventsResponse
21
+ from .celery_app import celery_app
22
+ from .config import load_config
23
+ from .config_utils import PreparedConfig, prepare_config_file
24
+ from .database import session_scope
25
+ from .dispatcher import dispatch_available_jobs
26
+ from .models import (
27
+ Experiment,
28
+ ExperimentJob,
29
+ ExperimentJobStatus,
30
+ ExperimentStatus,
31
+ JobExecutionLog,
32
+ )
33
+ from .results import ResultSummary, collect_result_summary
34
+ from .status import ExperimentStatusTracker
35
+ from .status_tracker import extract_config_info, update_status_from_output
36
+ from .trace_storage import persist_trials_from_summary, update_experiment_metadata
37
+
38
+ logger = get_task_logger(__name__)
39
+
40
+
41
+ TRAIN_COMMAND_ENV = "EXPERIMENT_QUEUE_TRAIN_CMD"
42
+
43
+
44
+ def _load_synth_api_key() -> str:
45
+ """Load SYNTH_API_KEY from .env file and fail loudly if not found.
46
+
47
+ Never falls back to other sources - must be explicitly set in .env file.
48
+
49
+ Returns:
50
+ The API key as a string.
51
+
52
+ Raises:
53
+ RuntimeError: If SYNTH_API_KEY is not found in .env file.
54
+ """
55
+ # Find .env file - check synth-ai root first, then current directory
56
+ repo_root = Path(__file__).resolve().parents[3] # synth_ai/experiment_queue/tasks.py -> synth-ai/
57
+ env_file = repo_root / ".env"
58
+
59
+ if not env_file.exists():
60
+ # Try current directory as fallback
61
+ env_file = Path(".env")
62
+
63
+ if env_file.exists():
64
+ load_dotenv(env_file, override=False) # Don't override existing env vars
65
+
66
+ api_key = os.getenv("SYNTH_API_KEY")
67
+
68
+ if not api_key:
69
+ raise RuntimeError(
70
+ f"❌ SYNTH_API_KEY not found! "
71
+ f"Please set it in {env_file.resolve() if env_file.exists() else 'synth-ai/.env'}. "
72
+ f"No fallback - API key must be explicitly set."
73
+ )
74
+
75
+ return api_key
76
+
77
+
78
+ def _find_venv_python() -> str:
79
+ """Find the venv Python executable to avoid uv cache permission issues.
80
+
81
+ Checks in order:
82
+ 1. sys.executable if already in a venv
83
+ 2. .venv/bin/python relative to current working directory
84
+ 3. .venv/bin/python relative to repo root (if synth_ai package is installed)
85
+ 4. Falls back to 'python' if venv not found
86
+ """
87
+ # If we're already running in a venv, use that
88
+ if sys.executable and ("venv" in sys.executable or ".venv" in sys.executable):
89
+ return sys.executable
90
+
91
+ # Check .venv/bin/python relative to current working directory
92
+ cwd_venv = Path.cwd() / ".venv" / "bin" / "python"
93
+ if cwd_venv.exists() and os.access(cwd_venv, os.X_OK):
94
+ return str(cwd_venv)
95
+
96
+ # Check .venv/bin/python relative to synth_ai package location
97
+ try:
98
+ import synth_ai
99
+
100
+ package_path = Path(synth_ai.__file__ or Path(__file__).resolve()).parent.parent.parent
101
+ pkg_venv = package_path / ".venv" / "bin" / "python"
102
+ if pkg_venv.exists() and os.access(pkg_venv, os.X_OK):
103
+ return str(pkg_venv)
104
+ except Exception:
105
+ pass
106
+
107
+ # Fallback to system python
108
+ return "python"
109
+
110
+
111
+ def _get_default_train_cmd() -> str:
112
+ """Get the default training command, evaluating venv path lazily.
113
+
114
+ This is called when building the command, not at module import time,
115
+ so it can properly detect the venv based on the current working directory.
116
+ """
117
+ return f"{_find_venv_python()} -m synth_ai.cli train"
118
+
119
+
120
+ def _extract_backend_job_id(output: str) -> str | None:
121
+ """Extract backend job ID from subprocess output.
122
+
123
+ Looks for patterns like:
124
+ - JSON: "job_id": "pl_xxxxx"
125
+ - Pattern: pl_[a-f0-9]+
126
+
127
+ Args:
128
+ output: Subprocess stdout/stderr output
129
+
130
+ Returns:
131
+ Backend job ID if found, None otherwise
132
+
133
+ Raises:
134
+ AssertionError: If extracted ID doesn't match expected format
135
+ """
136
+ if not output:
137
+ return None
138
+
139
+ # Assert output is a string
140
+ assert isinstance(output, str), f"Expected str, got {type(output).__name__}"
141
+
142
+ # Look for job_id in JSON response
143
+ match = re.search(r'"job_id"\s*:\s*"([^"]+)"', output)
144
+ if match:
145
+ job_id = match.group(1)
146
+ # Validate format
147
+ assert job_id.startswith("pl_"), f"Extracted job_id doesn't match expected format 'pl_*': {job_id}"
148
+ assert len(job_id) > 3, f"Extracted job_id too short: {job_id}"
149
+ return job_id
150
+
151
+ # Try pattern pl_xxxxx
152
+ match = re.search(r'pl_[a-f0-9]+', output)
153
+ if match:
154
+ job_id = match.group(0)
155
+ # Validate format
156
+ assert job_id.startswith("pl_"), f"Extracted job_id doesn't match expected format 'pl_*': {job_id}"
157
+ assert len(job_id) > 3, f"Extracted job_id too short: {job_id}"
158
+ return job_id
159
+
160
+ return None
161
+
162
+
163
+ def _poll_backend_progress(
164
+ backend_job_id: str,
165
+ status_tracker: ExperimentStatusTracker,
166
+ policy: str | None,
167
+ environment: str | None,
168
+ backend_url: str,
169
+ api_key: str,
170
+ stop_event: threading.Event,
171
+ job_start_time: float | None = None,
172
+ ) -> None:
173
+ """Poll backend API for progress events and update status_json.
174
+
175
+ Polls the backend API endpoint `/prompt-learning/online/jobs/{backend_job_id}/events`
176
+ every 5 seconds to fetch `prompt.learning.progress` events containing rollouts,
177
+ ETA, and best score information. Updates the experiment status_json in real-time.
178
+
179
+ Backend URL Configuration:
180
+ - Default: Production (https://api.usesynth.ai/api)
181
+ - Local: Set EXPERIMENT_QUEUE_LOCAL=true or use --local flag (http://localhost:8000/api)
182
+ - Custom: Set EXPERIMENT_QUEUE_BACKEND_URL env var
183
+
184
+ Args:
185
+ backend_job_id: Backend job ID to poll (e.g., "pl_xxxxx")
186
+ status_tracker: ExperimentStatusTracker instance for updating status_json
187
+ policy: Policy model name (e.g., "gpt-4", "llama-3.1-8b-instant")
188
+ environment: Environment name (e.g., "heartdisease", "hotpotqa")
189
+ backend_url: Backend API base URL (from config.backend_url)
190
+ api_key: API key for authentication (from SYNTH_API_KEY env var)
191
+ stop_event: Threading event to signal when to stop polling
192
+ """
193
+ import logging
194
+ import os
195
+
196
+ import requests
197
+
198
+ # Import BackendJobEvent locally to ensure it's available in this function's scope
199
+ from .api_schemas import BackendJobEvent # noqa: F811
200
+
201
+ # Get logger for this thread (logger from parent thread may not work correctly)
202
+ poller_logger = logging.getLogger(f"synth_ai.cli.local.experiment_queue.poller.{backend_job_id}")
203
+
204
+ # Set log level from environment variable if set (allows --loglevel flag to control verbosity)
205
+ # Use Celery's logger hierarchy instead of creating our own handler to avoid duplicates
206
+ log_level_env = os.getenv("EXPERIMENT_QUEUE_LOG_LEVEL", "INFO").upper()
207
+ try:
208
+ log_level = getattr(logging, log_level_env)
209
+ poller_logger.setLevel(log_level)
210
+ # Don't create handlers - let Celery's logging handle it
211
+ # Just propagate to parent logger (Celery's task logger)
212
+ poller_logger.propagate = True
213
+ except (AttributeError, ValueError):
214
+ # Invalid log level, use default
215
+ pass
216
+
217
+ # Validate inputs with assertions
218
+ assert backend_job_id, "backend_job_id cannot be empty"
219
+ assert backend_job_id.startswith("pl_"), f"Invalid backend_job_id format: expected 'pl_*', got '{backend_job_id}'"
220
+ assert backend_url, "backend_url cannot be empty"
221
+ assert backend_url.startswith(("http://", "https://")), f"Invalid backend_url format: {backend_url}"
222
+ assert api_key, "api_key cannot be empty"
223
+ assert status_tracker is not None, "status_tracker cannot be None"
224
+ assert stop_event is not None, "stop_event cannot be None"
225
+
226
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{backend_job_id}/events"
227
+ headers = {"Authorization": f"Bearer {api_key}"}
228
+ last_seq = 0
229
+ progress_start_time: float | None = None # Track when we first see progress
230
+ consecutive_timeouts = 0 # Track consecutive timeouts for exponential backoff
231
+ base_poll_interval = 5.0 # Base polling interval in seconds
232
+
233
+ # ✅ ADD: Track last progress update time to detect stuck jobs
234
+ last_progress_time: float | None = None
235
+ last_rollouts_completed: int | None = None
236
+ last_progress_seq = 0
237
+ stuck_threshold_seconds = 600.0 # 10 minutes without progress = stuck
238
+
239
+ poller_logger.info("📡 Starting progress poller for backend job %s (URL: %s)", backend_job_id, url)
240
+
241
+ while not stop_event.is_set():
242
+ events_received = 0
243
+ try:
244
+ # Assert URL is valid before making request
245
+ assert url.startswith(("http://", "https://")), f"Invalid URL format: {url}"
246
+
247
+ poller_logger.info("Polling backend API: %s (since_seq: %d)", url, last_seq)
248
+
249
+ try:
250
+ resp = requests.get(
251
+ url,
252
+ headers=headers,
253
+ params={"since_seq": last_seq, "limit": 100},
254
+ timeout=120, # Increased to 120s to handle slow backend/PostgREST responses
255
+ )
256
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
257
+ # ✅ ADD: Detect connection pool exhaustion in poller
258
+ error_str = str(e).lower()
259
+ is_pool_exhausted = (
260
+ "connection" in error_str
261
+ or "timeout" in error_str
262
+ or "refused" in error_str
263
+ )
264
+ if is_pool_exhausted:
265
+ # 🔥 VERY LOUD ERROR MESSAGES FOR CONNECTION POOL ISSUES IN POLLER
266
+ print("=" * 100, flush=True)
267
+ print("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER) 🔥🔥🔥", flush=True)
268
+ print("=" * 100, flush=True)
269
+ print(f"Backend Job ID: {backend_job_id}", flush=True)
270
+ print(f"URL: {url}", flush=True)
271
+ print(f"Error: {type(e).__name__}: {str(e)}", flush=True)
272
+ print("=" * 100, flush=True)
273
+ print("⚠️ Cannot fetch events - connection pool may be exhausted!", flush=True)
274
+ print("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables", flush=True)
275
+ print("=" * 100, flush=True)
276
+
277
+ poller_logger.error("=" * 100)
278
+ poller_logger.error("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER) 🔥🔥🔥")
279
+ poller_logger.error("=" * 100)
280
+ poller_logger.error("Backend Job ID: %s | URL: %s", backend_job_id, url)
281
+ poller_logger.error("Error: %s: %s", type(e).__name__, str(e))
282
+ poller_logger.error("⚠️ Cannot fetch events - connection pool may be exhausted!")
283
+ poller_logger.error("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables")
284
+ poller_logger.error("=" * 100)
285
+ raise
286
+
287
+ # Assert we got a response object
288
+ assert resp is not None, "requests.get() returned None"
289
+
290
+ poller_logger.info("API response: status=%d, content_length=%d", resp.status_code, len(resp.content))
291
+
292
+ # ✅ ADD: Detect connection pool exhaustion in HTTP error responses
293
+ if resp.status_code not in (200, 201):
294
+ body_text = (resp.text or "")[:500].lower()
295
+ is_pool_exhausted = (
296
+ resp.status_code == 503 # Service Unavailable
297
+ or resp.status_code == 429 # Too Many Requests (after long wait)
298
+ or "connection pool" in body_text
299
+ or "too many clients" in body_text
300
+ or "maxclients" in body_text
301
+ or "max clients" in body_text
302
+ or "connection refused" in body_text
303
+ )
304
+
305
+ if is_pool_exhausted:
306
+ # 🔥 VERY LOUD ERROR MESSAGES FOR CONNECTION POOL ISSUES IN POLLER
307
+ print("=" * 100, flush=True)
308
+ print("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER HTTP ERROR) 🔥🔥🔥", flush=True)
309
+ print("=" * 100, flush=True)
310
+ print(f"Backend Job ID: {backend_job_id}", flush=True)
311
+ print(f"URL: {url}", flush=True)
312
+ print(f"HTTP Status: {resp.status_code}", flush=True)
313
+ print(f"Response Body: {resp.text[:500]}", flush=True)
314
+ print("=" * 100, flush=True)
315
+ print("⚠️ Cannot fetch events - connection pool may be exhausted!", flush=True)
316
+ print("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables", flush=True)
317
+ print("=" * 100, flush=True)
318
+
319
+ poller_logger.error("=" * 100)
320
+ poller_logger.error("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER HTTP ERROR) 🔥🔥🔥")
321
+ poller_logger.error("=" * 100)
322
+ poller_logger.error("Backend Job ID: %s | URL: %s | HTTP: %d", backend_job_id, url, resp.status_code)
323
+ poller_logger.error("Response Body: %s", resp.text[:500])
324
+ poller_logger.error("⚠️ Cannot fetch events - connection pool may be exhausted!")
325
+ poller_logger.error("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables")
326
+ poller_logger.error("=" * 100)
327
+
328
+ if resp.status_code == 200:
329
+ # Parse and validate API response using Pydantic models
330
+ try:
331
+ raw_data = resp.json()
332
+ # Assert response is not None
333
+ assert raw_data is not None, "API returned None response"
334
+
335
+ # Parse response with validation
336
+ assert isinstance(raw_data, dict | list), (
337
+ f"API response must be dict or list, got {type(raw_data).__name__}: {raw_data}"
338
+ )
339
+
340
+ events_response = BackendEventsResponse.parse_response(raw_data)
341
+ assert isinstance(events_response, BackendEventsResponse), (
342
+ f"parse_response returned wrong type: {type(events_response).__name__}"
343
+ )
344
+ assert isinstance(events_response.events, list), (
345
+ f"events_response.events must be list, got {type(events_response.events).__name__}"
346
+ )
347
+
348
+ events_received = len(events_response.events)
349
+ assert events_received >= 0, (
350
+ f"events_received must be >= 0, got {events_received}"
351
+ )
352
+
353
+ # Process each event
354
+ event_types_seen: dict[str, int] = {}
355
+ for idx, event in enumerate(events_response.events):
356
+ # Assert event is BackendJobEvent instance
357
+ assert isinstance(event, BackendJobEvent), (
358
+ f"Event at index {idx} must be BackendJobEvent, got {type(event).__name__}"
359
+ )
360
+ # Assert event has required fields
361
+ assert event.seq >= 0, f"Invalid seq: {event.seq}"
362
+ assert event.type, f"Event missing type field: {event}"
363
+ assert event.message, f"Event missing message field: {event}"
364
+
365
+ # Track event types for debugging
366
+ event_types_seen[event.type] = event_types_seen.get(event.type, 0) + 1
367
+
368
+ # Check if this is a progress event
369
+ if event.type == "prompt.learning.progress":
370
+ poller_logger.info(
371
+ "Found progress event seq=%d: %s",
372
+ event.seq,
373
+ event.message[:100],
374
+ )
375
+ # Extract progress data with validation
376
+ progress_data = event.get_progress_data()
377
+ if progress_data is None:
378
+ poller_logger.warning(
379
+ "Progress event seq=%d has no parseable data. Event data: %s",
380
+ event.seq,
381
+ event.data,
382
+ )
383
+ continue
384
+
385
+ poller_logger.debug(
386
+ "Progress event seq=%d data: rollouts_completed=%s, rollouts_total=%s, best_score=%s, eta=%s",
387
+ event.seq,
388
+ progress_data.rollouts_completed,
389
+ progress_data.effective_rollouts_total,
390
+ progress_data.effective_best_score,
391
+ progress_data.eta_seconds,
392
+ )
393
+
394
+ # Use effective getters that handle field name variations
395
+ rollouts_completed = progress_data.rollouts_completed
396
+ rollouts_total = progress_data.effective_rollouts_total
397
+ eta_seconds = progress_data.eta_seconds
398
+ # percent_rollouts from backend is 0-1, convert to 0-100 for display
399
+ progress_pct = None
400
+ if progress_data.percent_rollouts is not None:
401
+ progress_pct = progress_data.percent_rollouts * 100.0
402
+ elif progress_data.percent_overall is not None:
403
+ # Fallback to percent_overall if percent_rollouts not available
404
+ progress_pct = progress_data.percent_overall * 100.0
405
+ best_score = progress_data.effective_best_score
406
+
407
+ # Track when we first see progress (for rollouts/min calculation)
408
+ if rollouts_completed is not None and rollouts_completed > 0 and progress_start_time is None:
409
+ progress_start_time = time.time()
410
+
411
+ # Calculate rollouts/min if we have progress and timing info
412
+ rollouts_per_minute = None
413
+ if rollouts_completed is not None and rollouts_completed > 0:
414
+ # Use progress_start_time if available, otherwise fall back to job_start_time
415
+ start_time_for_rate = progress_start_time or job_start_time
416
+ if start_time_for_rate is not None:
417
+ elapsed = time.time() - start_time_for_rate
418
+ if elapsed > 0:
419
+ rate_per_second = rollouts_completed / elapsed
420
+ rollouts_per_minute = rate_per_second * 60.0
421
+
422
+ # Assert data types and ranges
423
+ if rollouts_completed is not None:
424
+ assert isinstance(rollouts_completed, int), (
425
+ f"rollouts_completed must be int, got {type(rollouts_completed).__name__}: {rollouts_completed}"
426
+ )
427
+ assert rollouts_completed >= 0, (
428
+ f"rollouts_completed must be >= 0, got {rollouts_completed}"
429
+ )
430
+
431
+ if rollouts_total is not None:
432
+ assert isinstance(rollouts_total, int), (
433
+ f"rollouts_total must be int, got {type(rollouts_total).__name__}: {rollouts_total}"
434
+ )
435
+ assert rollouts_total > 0, (
436
+ f"rollouts_total must be > 0, got {rollouts_total}"
437
+ )
438
+
439
+ if eta_seconds is not None:
440
+ assert isinstance(eta_seconds, int | float), (
441
+ f"eta_seconds must be int | float, got {type(eta_seconds).__name__}: {eta_seconds}"
442
+ )
443
+ assert eta_seconds >= 0, (
444
+ f"eta_seconds must be >= 0, got {eta_seconds}"
445
+ )
446
+
447
+ if best_score is not None:
448
+ assert isinstance(best_score, int | float), (
449
+ f"best_score must be int | float, got {type(best_score).__name__}: {best_score}"
450
+ )
451
+ assert 0 <= best_score <= 1, (
452
+ f"best_score must be in [0, 1], got {best_score}"
453
+ )
454
+
455
+ if progress_pct is not None:
456
+ assert isinstance(progress_pct, int | float), (
457
+ f"progress_pct must be int | float, got {type(progress_pct).__name__}: {progress_pct}"
458
+ )
459
+ assert 0 <= progress_pct <= 100, (
460
+ f"progress_pct must be in [0, 100], got {progress_pct}"
461
+ )
462
+
463
+ # Assert consistency: rollouts_completed <= rollouts_total
464
+ if rollouts_completed is not None and rollouts_total is not None:
465
+ assert rollouts_completed <= rollouts_total, (
466
+ f"rollouts_completed ({rollouts_completed}) > rollouts_total ({rollouts_total})"
467
+ )
468
+
469
+ # Assert we have meaningful progress data
470
+ has_progress = (
471
+ rollouts_completed is not None
472
+ or best_score is not None
473
+ or rollouts_total is not None
474
+ )
475
+
476
+ # ✅ Initialize custom_fields before use (extract from event data for validation phase tracking)
477
+ custom_fields: dict[str, Any] = {}
478
+ if event.data and isinstance(event.data, dict):
479
+ # Extract phase and validation info if present
480
+ phase = event.data.get("phase")
481
+ if phase == "validation":
482
+ custom_fields["phase"] = "validation"
483
+ if "validation_candidate" in event.data:
484
+ custom_fields["validation_candidate"] = event.data["validation_candidate"]
485
+ if "validation_total" in event.data:
486
+ custom_fields["validation_total"] = event.data["validation_total"]
487
+
488
+ if has_progress:
489
+ # Validate status_tracker before update
490
+ assert status_tracker is not None, "status_tracker is None"
491
+ assert hasattr(status_tracker, "update"), "status_tracker missing update method"
492
+ assert hasattr(status_tracker, "job_id"), "status_tracker missing job_id"
493
+
494
+ status_tracker.update(
495
+ policy=policy,
496
+ environment=environment,
497
+ rollouts_completed=rollouts_completed,
498
+ total_rollouts=rollouts_total,
499
+ eta_seconds=eta_seconds,
500
+ progress_pct=progress_pct,
501
+ best_score=best_score,
502
+ rollouts_per_minute=rollouts_per_minute,
503
+ custom_fields=custom_fields if custom_fields else None,
504
+ )
505
+
506
+ # ✅ ADD: Track progress for stuck detection
507
+ import time as _time_module
508
+ current_time = _time_module.time()
509
+ if rollouts_completed is not None:
510
+ if last_rollouts_completed is None or rollouts_completed != last_rollouts_completed:
511
+ # Progress changed - update tracking
512
+ last_progress_time = current_time
513
+ last_rollouts_completed = rollouts_completed
514
+ last_progress_seq = event.seq
515
+ poller_logger.info(
516
+ "📊 Progress update for job %s: %s/%s rollouts, ETA: %s, Best: %s",
517
+ backend_job_id,
518
+ rollouts_completed,
519
+ rollouts_total,
520
+ eta_seconds,
521
+ best_score,
522
+ )
523
+ elif last_progress_time is not None:
524
+ # Check if stuck (no progress for threshold time)
525
+ time_since_progress = current_time - last_progress_time
526
+ if time_since_progress >= stuck_threshold_seconds:
527
+ poller_logger.warning(
528
+ "⚠️ Job %s appears STUCK: No progress for %.1f minutes (last: %s/%s rollouts at seq %d)",
529
+ backend_job_id,
530
+ time_since_progress / 60.0,
531
+ last_rollouts_completed,
532
+ rollouts_total,
533
+ last_progress_seq,
534
+ )
535
+ # Emit warning event
536
+ with contextlib.suppress(Exception):
537
+ status_tracker.update(
538
+ custom_fields={
539
+ **(custom_fields or {}),
540
+ "stuck_warning": True,
541
+ "time_since_progress_seconds": time_since_progress,
542
+ }
543
+ )
544
+ else:
545
+ # No rollouts info - log anyway
546
+ poller_logger.info(
547
+ "📊 Progress update for job %s: %s/%s rollouts, ETA: %s, Best: %s",
548
+ backend_job_id,
549
+ rollouts_completed,
550
+ rollouts_total,
551
+ eta_seconds,
552
+ best_score,
553
+ )
554
+
555
+ # Update last_seq (always update, even if no progress data)
556
+ last_seq = max(last_seq, event.seq)
557
+ else:
558
+ # Non-progress event - just update seq
559
+ last_seq = max(last_seq, event.seq)
560
+
561
+ # ✅ ADD: Track consecutive polls with no new events
562
+ if events_received == 0:
563
+ # Increment counter for no-event polls
564
+ if not hasattr(_poll_backend_progress, '_no_event_polls'):
565
+ _poll_backend_progress._no_event_polls = {} # type: ignore[attr-defined]
566
+ if backend_job_id not in _poll_backend_progress._no_event_polls: # type: ignore[attr-defined]
567
+ _poll_backend_progress._no_event_polls[backend_job_id] = 0 # type: ignore[attr-defined]
568
+ _poll_backend_progress._no_event_polls[backend_job_id] += 1 # type: ignore[attr-defined]
569
+ no_event_count = _poll_backend_progress._no_event_polls[backend_job_id] # type: ignore[attr-defined]
570
+
571
+ # Warn if we've had many consecutive polls with no events
572
+ if no_event_count >= 12: # 12 polls * 5s = 60s with no events
573
+ poller_logger.warning(
574
+ "⚠️ Job %s: No new events for %d consecutive polls (~%ds). Last seq: %d. Job may be stuck.",
575
+ backend_job_id,
576
+ no_event_count,
577
+ no_event_count * int(base_poll_interval),
578
+ last_seq,
579
+ )
580
+ # Emit warning in status_json
581
+ with contextlib.suppress(Exception):
582
+ status_tracker.update(
583
+ custom_fields={
584
+ "no_event_polls": no_event_count,
585
+ "last_event_seq": last_seq,
586
+ "stuck_warning": True,
587
+ }
588
+ )
589
+
590
+ poller_logger.info("Progress poller heartbeat for job %s (no new events, last_seq=%d, consecutive_no_events=%d)", backend_job_id, last_seq, no_event_count)
591
+ else:
592
+ # Reset counter when we get events
593
+ if hasattr(_poll_backend_progress, '_no_event_polls') and backend_job_id in _poll_backend_progress._no_event_polls: # type: ignore[attr-defined]
594
+ _poll_backend_progress._no_event_polls[backend_job_id] = 0 # type: ignore[attr-defined]
595
+
596
+ event_types_str = ", ".join(f"{k}:{v}" for k, v in sorted(event_types_seen.items()))
597
+ poller_logger.info(
598
+ "Processed %d events (types: %s), updated last_seq to %d",
599
+ events_received,
600
+ event_types_str,
601
+ last_seq,
602
+ )
603
+ # Log if we're not seeing progress events
604
+ if "prompt.learning.progress" not in event_types_seen:
605
+ poller_logger.debug(
606
+ "No progress events in this batch (last_seq=%d). Event types seen: %s",
607
+ last_seq,
608
+ event_types_str,
609
+ )
610
+
611
+ # Reset timeout counter on successful request
612
+ consecutive_timeouts = 0
613
+
614
+ except AssertionError as e:
615
+ poller_logger.error(
616
+ "❌ Assertion failed while parsing events for job %s: %s. Response: %s",
617
+ backend_job_id,
618
+ e,
619
+ resp.text[:500] if resp else "No response",
620
+ )
621
+ # Continue polling - don't stop on validation errors
622
+ except ValueError as e:
623
+ poller_logger.error(
624
+ "❌ Invalid API response format for job %s: %s. Response: %s",
625
+ backend_job_id,
626
+ e,
627
+ resp.text[:500] if resp else "No response",
628
+ )
629
+ # Continue polling - don't stop on validation errors
630
+ except Exception as e:
631
+ poller_logger.error(
632
+ "❌ Unexpected error parsing events for job %s: %s. Response: %s",
633
+ backend_job_id,
634
+ e,
635
+ resp.text[:500] if resp else "No response",
636
+ exc_info=True,
637
+ )
638
+ # Continue polling - don't stop on parsing errors
639
+ elif resp.status_code == 404:
640
+ # Job not found yet or doesn't exist - stop polling
641
+ poller_logger.warning("Backend job %s not found (404), stopping poller", backend_job_id)
642
+ break
643
+ elif resp.status_code != 200:
644
+ poller_logger.warning(
645
+ "Backend API returned status %d for job %s: %s",
646
+ resp.status_code,
647
+ backend_job_id,
648
+ resp.text[:200],
649
+ )
650
+ except requests.exceptions.ReadTimeout as e:
651
+ # ReadTimeout is expected when backend is slow - log as warning and use exponential backoff
652
+ consecutive_timeouts += 1
653
+ backoff_seconds = min(base_poll_interval * (2 ** min(consecutive_timeouts - 1, 4)), 60.0) # Max 60s backoff
654
+ poller_logger.warning(
655
+ "Backend timeout polling job %s (consecutive=%d, backing off %.1fs): %s",
656
+ backend_job_id,
657
+ consecutive_timeouts,
658
+ backoff_seconds,
659
+ e,
660
+ )
661
+ # Use exponential backoff on timeout
662
+ stop_event.wait(timeout=backoff_seconds)
663
+ continue
664
+ except requests.exceptions.RequestException as e:
665
+ # Other network errors - log as warning, reset timeout counter
666
+ consecutive_timeouts = 0
667
+ poller_logger.warning("Network error polling job %s: %s", backend_job_id, e)
668
+ except Exception as e:
669
+ # Unexpected errors - log as error but don't crash
670
+ consecutive_timeouts = 0
671
+ poller_logger.error("Progress poller error for job %s: %s", backend_job_id, e, exc_info=True)
672
+
673
+ # Poll every 5 seconds (or after backoff)
674
+ stop_event.wait(timeout=base_poll_interval)
675
+
676
+ poller_logger.info("📡 Stopped progress poller for backend job %s", backend_job_id)
677
+
678
+
679
+ def _truncate(text: str, limit: int = 4000) -> str:
680
+ """Truncate text to a maximum length, keeping the end portion.
681
+
682
+ Args:
683
+ text: Text to truncate
684
+ limit: Maximum length in characters (default: 4000)
685
+
686
+ Returns:
687
+ Truncated text (last `limit` characters if text exceeds limit)
688
+ """
689
+ if len(text) <= limit:
690
+ return text
691
+ return text[-limit:]
692
+
693
+
694
+ def _build_train_command(config_path: str) -> list[str]:
695
+ """Build the training command for running a prompt learning job.
696
+
697
+ Constructs a command list suitable for subprocess execution by:
698
+ 1. Getting the base command from EXPERIMENT_QUEUE_TRAIN_CMD env var or default
699
+ 2. Parsing the base command into segments
700
+ 3. Appending prompt learning specific flags (--type, --config, --poll, etc.)
701
+ 4. Adding --backend flag with URL from experiment queue config
702
+
703
+ Args:
704
+ config_path: Path to the TOML config file for the experiment
705
+
706
+ Returns:
707
+ List of command segments ready for subprocess execution
708
+
709
+ Note:
710
+ The base command defaults to `python -m synth_ai.cli train` if
711
+ EXPERIMENT_QUEUE_TRAIN_CMD is not set. The command always includes
712
+ --type prompt_learning, --config, --poll, --stream-format cli, and --backend flags.
713
+ """
714
+ # Get command from env var or use default (lazily evaluated)
715
+ base_cmd = os.getenv(TRAIN_COMMAND_ENV)
716
+ if base_cmd:
717
+ logger.debug("Using training command from EXPERIMENT_QUEUE_TRAIN_CMD: %s", base_cmd)
718
+ else:
719
+ base_cmd = _get_default_train_cmd()
720
+ logger.debug("Using default training command: %s", base_cmd)
721
+
722
+ segments: list[str] = []
723
+ for part in shlex.split(base_cmd):
724
+ if part:
725
+ segments.append(part)
726
+
727
+ # Get backend URL from config and add --backend flag
728
+ config = load_config()
729
+ backend_url = config.backend_url
730
+
731
+ segments.extend(
732
+ [
733
+ "--type",
734
+ "prompt_learning",
735
+ "--config",
736
+ config_path,
737
+ "--backend",
738
+ backend_url,
739
+ "--poll",
740
+ "--stream-format",
741
+ "cli",
742
+ ]
743
+ )
744
+ return segments
745
+
746
+
747
+ def _mark_job_running(job_id: str, task_id: str | None) -> ExperimentJob | None:
748
+ """Mark a job as running and update its status in the database.
749
+
750
+ Updates the job status to RUNNING, sets the started_at timestamp, and
751
+ optionally associates a Celery task ID. If the parent experiment is
752
+ QUEUED, it is also marked as RUNNING.
753
+
754
+ Args:
755
+ job_id: Job identifier
756
+ task_id: Optional Celery task ID to associate with the job
757
+
758
+ Returns:
759
+ ExperimentJob instance if found, None otherwise
760
+
761
+ Note:
762
+ The job is expunged from the session so it can be safely used outside
763
+ the session scope. The session is committed automatically by session_scope.
764
+ """
765
+ with session_scope() as session:
766
+ job = session.get(ExperimentJob, job_id)
767
+ if not job:
768
+ logger.warning("Job %s missing from database", job_id)
769
+ return None
770
+ job.status = ExperimentJobStatus.RUNNING
771
+ job.started_at = datetime.now(UTC)
772
+ if task_id:
773
+ job.celery_task_id = task_id
774
+ experiment = job.experiment
775
+ if experiment and experiment.status == ExperimentStatus.QUEUED:
776
+ experiment.status = ExperimentStatus.RUNNING
777
+ experiment.started_at = datetime.now(UTC)
778
+ session.flush()
779
+ # Expunge so job can be safely used outside session scope
780
+ session.expunge(job)
781
+ return job
782
+
783
+
784
+ def _jobs_remaining(session, experiment_id: str) -> int:
785
+ """Count remaining jobs (QUEUED or RUNNING) for an experiment.
786
+
787
+ Args:
788
+ session: SQLAlchemy session
789
+ experiment_id: Experiment identifier
790
+
791
+ Returns:
792
+ Number of jobs that are still QUEUED or RUNNING (not completed/failed)
793
+ """
794
+ return (
795
+ session.query(ExperimentJob)
796
+ .filter(
797
+ ExperimentJob.experiment_id == experiment_id,
798
+ ExperimentJob.status.in_(
799
+ [
800
+ ExperimentJobStatus.QUEUED,
801
+ ExperimentJobStatus.RUNNING,
802
+ ]
803
+ ),
804
+ )
805
+ .count()
806
+ )
807
+
808
+
809
+ def _finalize_job(
810
+ job_id: str,
811
+ *,
812
+ summary: ResultSummary,
813
+ success: bool,
814
+ error_message: str | None = None,
815
+ command: str | None = None,
816
+ working_directory: str | None = None,
817
+ python_executable: str | None = None,
818
+ environment_keys: list[str] | None = None,
819
+ ) -> dict[str, Any] | None:
820
+ """Finalize a job by updating its status and persisting results.
821
+
822
+ Updates the job status to COMPLETED or FAILED based on success flag,
823
+ persists trial data if successful, and updates experiment status when
824
+ all jobs are done. If the experiment has remaining jobs, dispatches them.
825
+
826
+ Args:
827
+ job_id: Job identifier
828
+ summary: Result summary containing stdout, stderr, metrics, etc.
829
+ success: Whether the job completed successfully
830
+ error_message: Optional error message if job failed
831
+
832
+ Returns:
833
+ Summary dictionary if job found, None otherwise
834
+
835
+ Note:
836
+ - If successful: Job status set to COMPLETED, trials persisted
837
+ - If failed: Job status set to FAILED, error message stored
838
+ - Experiment status updated to COMPLETED/FAILED only when all jobs done
839
+ - Remaining jobs are dispatched if experiment still has queued jobs
840
+ """
841
+ with session_scope() as session:
842
+ job = session.get(ExperimentJob, job_id)
843
+ if not job:
844
+ logger.warning("Job %s missing during finalize", job_id)
845
+ return None
846
+
847
+ job.completed_at = datetime.now(UTC)
848
+ experiment = job.experiment
849
+
850
+ # ALWAYS create execution log entry (for both success and failure)
851
+ # This allows querying failures directly from the database
852
+ if command is not None and working_directory is not None:
853
+ from uuid import uuid4
854
+ # For failed jobs, store full stdout/stderr (up to 100k chars each)
855
+ # For successful jobs, truncate to 4k chars to save space
856
+ stdout_for_log = summary.stdout or ""
857
+ stderr_for_log = summary.stderr or ""
858
+ if not success:
859
+ # Keep full output for errors (truncate only if extremely large)
860
+ if len(stdout_for_log) > 100000:
861
+ stdout_for_log = f"{stdout_for_log[:50000]}\n\n... (truncated {len(stdout_for_log) - 100000} chars) ...\n\n{stdout_for_log[-50000:]}"
862
+ if len(stderr_for_log) > 100000:
863
+ stderr_for_log = f"{stderr_for_log[:50000]}\n\n... (truncated {len(stderr_for_log) - 100000} chars) ...\n\n{stderr_for_log[-50000:]}"
864
+ else:
865
+ # Truncate successful job output to save space
866
+ stdout_for_log = _truncate(stdout_for_log)
867
+ stderr_for_log = _truncate(stderr_for_log)
868
+
869
+ execution_log = JobExecutionLog(
870
+ log_id=f"log_{uuid4().hex[:12]}",
871
+ job_id=job_id,
872
+ command=command,
873
+ working_directory=working_directory,
874
+ returncode=summary.returncode,
875
+ stdout=stdout_for_log,
876
+ stderr=stderr_for_log,
877
+ python_executable=python_executable,
878
+ environment_keys=environment_keys,
879
+ )
880
+ session.add(execution_log)
881
+ logger.info(
882
+ "Created execution log for job %s: returncode=%d, stdout_len=%d (stored: %d), stderr_len=%d (stored: %d)%s",
883
+ job_id,
884
+ summary.returncode,
885
+ len(summary.stdout or ""),
886
+ len(stdout_for_log),
887
+ len(summary.stderr or ""),
888
+ len(stderr_for_log),
889
+ " [FULL ERROR STORED]" if not success else "",
890
+ )
891
+
892
+ if success:
893
+ # Only set job.result for successful jobs to prevent stale data from previous runs
894
+ job.result = summary.to_dict()
895
+ job.status = ExperimentJobStatus.COMPLETED
896
+ persist_trials_from_summary(session, job, summary)
897
+ if experiment:
898
+ update_experiment_metadata(experiment, summary)
899
+
900
+ # ✅ ADD: Update status_json with final stats from backend job metadata
901
+ if job.backend_job_id:
902
+ try:
903
+ import requests
904
+
905
+ from .service import update_job_status
906
+
907
+ # Fetch backend job metadata
908
+ config = load_config()
909
+ backend_url = config.backend_url
910
+ # Load API key from .env - fail loudly if not found
911
+ try:
912
+ api_key = _load_synth_api_key()
913
+ except RuntimeError as e:
914
+ logger.error(str(e))
915
+ raise
916
+
917
+ if backend_url and api_key:
918
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{job.backend_job_id}"
919
+ headers = {"Authorization": f"Bearer {api_key}"}
920
+ resp = requests.get(url, headers=headers, timeout=60.0) # Increased from 10s to 60s to handle backend overload
921
+
922
+ if resp.status_code == 200:
923
+ backend_job = resp.json()
924
+ backend_metadata = backend_job.get("metadata", {})
925
+ backend_stats = backend_metadata.get("stats", {})
926
+
927
+ if backend_stats:
928
+ # Update status_json with final stats (including scores for result extraction)
929
+ status_update = {
930
+ "trials_tried": backend_stats.get("trials_tried"),
931
+ "total_tokens": backend_stats.get("total_tokens"),
932
+ "total_rollouts": backend_stats.get("total_rollouts"),
933
+ "optimization_rollouts_executed": backend_stats.get("optimization_rollouts_executed"),
934
+ "validation_rollouts_executed": backend_stats.get("validation_rollouts_executed"),
935
+ "optimization_trials_evaluated": backend_stats.get("optimization_trials_evaluated"),
936
+ "validation_trials_evaluated": backend_stats.get("validation_trials_evaluated"),
937
+ # CRITICAL: Store scores for result extraction (if backend job returns 404 later)
938
+ "baseline_score": backend_stats.get("baseline_score"),
939
+ "best_score": backend_stats.get("best_score") or backend_stats.get("best_validation_score"),
940
+ "total_time_seconds": backend_stats.get("total_time_seconds"),
941
+ "eval_seeds_n": backend_stats.get("eval_seeds_n"),
942
+ "transformations_evaluated": backend_stats.get("transformations_evaluated"),
943
+ }
944
+ # Remove None values
945
+ status_update = {k: v for k, v in status_update.items() if v is not None}
946
+ # ✅ ADD: Assertion to ensure we have at least some stats
947
+ assert len(status_update) > 0, f"status_update must not be empty for job {job_id}"
948
+ if status_update:
949
+ update_job_status(job_id, status_update)
950
+ logger.info(
951
+ "Updated status_json with final stats for job %s: %s",
952
+ job_id,
953
+ status_update,
954
+ )
955
+ except Exception as e:
956
+ # Log but don't fail job finalization if stats update fails
957
+ logger.warning(
958
+ "Failed to update status_json with final stats for job %s: %s",
959
+ job_id,
960
+ e,
961
+ )
962
+ else:
963
+ # Job failed - clear job.result to prevent stale data from previous successful runs
964
+ job.result = None
965
+ job.status = ExperimentJobStatus.FAILED
966
+ # Store full error message (truncate to 100k chars max to avoid DB issues, but keep full context)
967
+ full_error = error_message or summary.stderr or "Job failed"
968
+ if len(full_error) > 100000:
969
+ # Keep first 50k and last 50k chars
970
+ full_error = f"{full_error[:50000]}\n\n... (truncated {len(full_error) - 100000} chars) ...\n\n{full_error[-50000:]}"
971
+ job.error = full_error
972
+ if experiment:
973
+ # Don't immediately mark experiment as failed - let remaining jobs continue
974
+ # The experiment will be marked as failed only if all jobs fail
975
+ logger.warning(
976
+ "Job %s failed for experiment %s, but allowing remaining jobs to continue",
977
+ job_id,
978
+ experiment.experiment_id,
979
+ )
980
+
981
+ session.flush()
982
+
983
+ if experiment:
984
+ remaining = _jobs_remaining(session, experiment.experiment_id)
985
+ if remaining == 0:
986
+ # All jobs completed - check if experiment succeeded or failed
987
+ all_jobs = (
988
+ session.query(ExperimentJob)
989
+ .filter(ExperimentJob.experiment_id == experiment.experiment_id)
990
+ .all()
991
+ )
992
+ all_failed = all(
993
+ job.status == ExperimentJobStatus.FAILED for job in all_jobs
994
+ )
995
+ if all_failed:
996
+ experiment.status = ExperimentStatus.FAILED
997
+ experiment.error = (
998
+ all_jobs[0].error if all_jobs else "All jobs failed"
999
+ )
1000
+ else:
1001
+ experiment.status = ExperimentStatus.COMPLETED
1002
+ experiment.completed_at = datetime.now(UTC)
1003
+ else:
1004
+ # Dispatch remaining jobs (periodic task will also handle this as backup)
1005
+ dispatch_available_jobs(session, experiment.experiment_id)
1006
+
1007
+ return summary.to_dict()
1008
+
1009
+
1010
+ @celery_app.task(bind=True, name="synth_ai.cli.local.experiment_queue.run_experiment_job")
1011
+ def run_experiment_job(self, job_id: str) -> dict[str, Any] | None:
1012
+ """Celery task entrypoint for running a prompt learning experiment job.
1013
+
1014
+ This is the main Celery task that executes prompt learning jobs. It:
1015
+ 1. Marks the job as RUNNING
1016
+ 2. Prepares the config file (applies overrides)
1017
+ 3. Builds and executes the training command via subprocess
1018
+ 4. Collects results (stdout, stderr, metrics, artifacts)
1019
+ 5. Finalizes the job (updates status, persists results)
1020
+
1021
+ Args:
1022
+ self: Celery task instance (bound task)
1023
+ job_id: Job identifier from the experiment queue database
1024
+
1025
+ Returns:
1026
+ Result summary dictionary if successful, None if job not found
1027
+
1028
+ Raises:
1029
+ AssertionError: If inputs are invalid (should not happen in production)
1030
+
1031
+ Note:
1032
+ The task runs the training command (`synth-ai train --type prompt_learning`)
1033
+ as a subprocess and captures stdout/stderr. Health check failures and
1034
+ authentication errors are detected and cause job failure even if returncode is 0.
1035
+ """
1036
+ # Validate input
1037
+ assert isinstance(job_id, str), (
1038
+ f"job_id must be str, got {type(job_id).__name__}: {job_id}"
1039
+ )
1040
+ assert job_id, "job_id cannot be empty"
1041
+
1042
+ job = _mark_job_running(job_id, getattr(self.request, "id", None))
1043
+ if not job:
1044
+ logger.warning("Job %s not found or could not be marked as running", job_id)
1045
+ return None
1046
+
1047
+ # Validate job object
1048
+ assert isinstance(job, ExperimentJob), (
1049
+ f"_mark_job_running must return ExperimentJob, got {type(job).__name__}"
1050
+ )
1051
+ assert job.job_id == job_id, (
1052
+ f"Job ID mismatch: expected {job_id}, got {job.job_id}"
1053
+ )
1054
+ assert job.status == ExperimentJobStatus.RUNNING, (
1055
+ f"Job status must be RUNNING, got {job.status}"
1056
+ )
1057
+
1058
+ summary = ResultSummary()
1059
+ prepared: PreparedConfig | None = None
1060
+ success = False
1061
+ error_message: str | None = None # Will be set if training fails
1062
+ cmd: list[str] | None = None # Store command for execution logging
1063
+ env: dict[str, str] | None = None # Store environment for execution logging
1064
+
1065
+ # Initialize status tracker
1066
+ assert job.job_id, "job.job_id cannot be empty"
1067
+ status_tracker = ExperimentStatusTracker(job.job_id)
1068
+ assert status_tracker.job_id == job.job_id, (
1069
+ f"Status tracker job_id mismatch: expected {job.job_id}, got {status_tracker.job_id}"
1070
+ )
1071
+
1072
+ job_start_time = time.time()
1073
+ assert job_start_time > 0, f"job_start_time must be > 0, got {job_start_time}"
1074
+
1075
+ policy: str | None = None
1076
+ environment: str | None = None
1077
+
1078
+ try:
1079
+ # Validate config_path
1080
+ assert job.config_path, "job.config_path cannot be empty"
1081
+ assert isinstance(job.config_path, str), (
1082
+ f"job.config_path must be str, got {type(job.config_path).__name__}"
1083
+ )
1084
+
1085
+ # Validate config_overrides
1086
+ if job.config_overrides is not None:
1087
+ assert isinstance(job.config_overrides, dict), (
1088
+ f"job.config_overrides must be dict, got {type(job.config_overrides).__name__}"
1089
+ )
1090
+
1091
+ prepared = prepare_config_file(job.config_path, job.config_overrides or {})
1092
+ assert prepared is not None, "prepare_config_file returned None"
1093
+ assert isinstance(prepared, PreparedConfig), (
1094
+ f"prepare_config_file must return PreparedConfig, got {type(prepared).__name__}"
1095
+ )
1096
+ assert prepared.path.exists(), (
1097
+ f"Prepared config file must exist: {prepared.path}"
1098
+ )
1099
+
1100
+ # Extract policy and environment from config
1101
+ policy, environment = extract_config_info(prepared.path)
1102
+ assert isinstance(policy, str | type(None)), (
1103
+ f"policy must be str | None, got {type(policy).__name__}: {policy}"
1104
+ )
1105
+ assert isinstance(environment, str | type(None)), (
1106
+ f"environment must be str | None, got {type(environment).__name__}: {environment}"
1107
+ )
1108
+
1109
+ # Extract model/provider from override FIRST (override takes precedence)
1110
+ model_override = None
1111
+ provider_override = None
1112
+ if job.config_overrides:
1113
+ model_override = job.config_overrides.get("prompt_learning.policy.model")
1114
+ provider_override = job.config_overrides.get("prompt_learning.policy.provider")
1115
+
1116
+ # Use override if available, otherwise use extracted
1117
+ final_model = model_override or policy
1118
+ final_provider = provider_override
1119
+
1120
+ # ASSERT: Verify overrides were applied by checking the prepared config
1121
+ if job.config_overrides:
1122
+ rollout_budget_override = job.config_overrides.get("prompt_learning.gepa.rollout.budget")
1123
+ max_rollouts_override = job.config_overrides.get("prompt_learning.termination_config.max_rollouts")
1124
+
1125
+ # Assert model override matches extracted policy
1126
+ if model_override:
1127
+ assert policy == model_override, (
1128
+ f"CRITICAL: Policy model mismatch for job {job.job_id}: "
1129
+ f"override={model_override!r} but extracted={policy!r}. "
1130
+ f"This indicates the override wasn't applied correctly to the prepared config. "
1131
+ f"Config path: {prepared.path}"
1132
+ )
1133
+ logger.info(
1134
+ "✅ Config override verified for job %s: model=%s matches extracted policy",
1135
+ job.job_id,
1136
+ model_override,
1137
+ )
1138
+
1139
+ # Assert provider override if specified
1140
+ if provider_override:
1141
+ # Extract provider from prepared config
1142
+ import tomllib
1143
+ with open(prepared.path, "rb") as f:
1144
+ prepared_config = tomllib.load(f)
1145
+ pl_section = prepared_config.get("prompt_learning", {})
1146
+ policy_section = pl_section.get("policy", {})
1147
+ extracted_provider = policy_section.get("provider") if isinstance(policy_section, dict) else None
1148
+ if extracted_provider:
1149
+ assert extracted_provider == provider_override, (
1150
+ f"CRITICAL: Provider mismatch for job {job.job_id}: "
1151
+ f"override={provider_override!r} but extracted={extracted_provider!r}. "
1152
+ f"Config path: {prepared.path}"
1153
+ )
1154
+
1155
+ # Assert rollout budget override if specified
1156
+ if rollout_budget_override is not None:
1157
+ import tomllib
1158
+ with open(prepared.path, "rb") as f:
1159
+ prepared_config = tomllib.load(f)
1160
+ pl_section = prepared_config.get("prompt_learning", {})
1161
+ gepa_section = pl_section.get("gepa", {})
1162
+ rollout_section = gepa_section.get("rollout", {}) if isinstance(gepa_section, dict) else {}
1163
+ extracted_budget = rollout_section.get("budget") if isinstance(rollout_section, dict) else None
1164
+ if extracted_budget is not None:
1165
+ assert extracted_budget == rollout_budget_override, (
1166
+ f"CRITICAL: Rollout budget mismatch for job {job.job_id}: "
1167
+ f"override={rollout_budget_override} but extracted={extracted_budget}. "
1168
+ f"Config path: {prepared.path}"
1169
+ )
1170
+
1171
+ # Assert max_rollouts override if specified
1172
+ if max_rollouts_override is not None:
1173
+ import tomllib
1174
+ with open(prepared.path, "rb") as f:
1175
+ prepared_config = tomllib.load(f)
1176
+ pl_section = prepared_config.get("prompt_learning", {})
1177
+ termination_section = pl_section.get("termination_config", {})
1178
+ extracted_max_rollouts = termination_section.get("max_rollouts") if isinstance(termination_section, dict) else None
1179
+ if extracted_max_rollouts is not None:
1180
+ assert extracted_max_rollouts == max_rollouts_override, (
1181
+ f"CRITICAL: Max rollouts mismatch for job {job.job_id}: "
1182
+ f"override={max_rollouts_override} but extracted={extracted_max_rollouts}. "
1183
+ f"Config path: {prepared.path}"
1184
+ )
1185
+
1186
+ if final_model or environment:
1187
+ # Build policy string with provider if available
1188
+ policy_str = f"{final_provider}/{final_model}" if final_provider and final_model else final_model
1189
+ status_tracker.update(policy=policy_str, environment=environment)
1190
+ logger.info(
1191
+ "📊 Experiment config for job %s: policy=%s, environment=%s",
1192
+ job.job_id,
1193
+ policy or "unknown",
1194
+ environment or "unknown",
1195
+ )
1196
+
1197
+ cmd = _build_train_command(str(prepared.path))
1198
+ assert isinstance(cmd, list), (
1199
+ f"_build_train_command must return list, got {type(cmd).__name__}"
1200
+ )
1201
+ # Store cmd for execution logging (needed at end of function)
1202
+ assert len(cmd) > 0, "Command list cannot be empty"
1203
+ assert all(isinstance(arg, str) for arg in cmd), (
1204
+ f"All command arguments must be str, got types: {[type(arg).__name__ for arg in cmd]}"
1205
+ )
1206
+ logger.info("Executing job %s via command: %s", job.job_id, " ".join(cmd))
1207
+
1208
+ # Run command with unbuffered output to see errors immediately
1209
+ env = os.environ.copy()
1210
+ assert isinstance(env, dict), (
1211
+ f"os.environ.copy() must return dict, got {type(env).__name__}"
1212
+ )
1213
+ env["PYTHONUNBUFFERED"] = "1"
1214
+
1215
+ # Log authentication status BEFORE running command
1216
+ synth_key = env.get("SYNTH_API_KEY")
1217
+ env_key = env.get("ENVIRONMENT_API_KEY")
1218
+ logger.info(
1219
+ "🔐 Authentication status for job %s:\n"
1220
+ " SYNTH_API_KEY: %s\n"
1221
+ " ENVIRONMENT_API_KEY: %s",
1222
+ job.job_id,
1223
+ f"{synth_key[:8]}...{synth_key[-4:]}" if synth_key and len(synth_key) > 12 else "(NOT SET)",
1224
+ f"{env_key[:8]}...{env_key[-4:]}" if env_key and len(env_key) > 12 else "(NOT SET)",
1225
+ )
1226
+
1227
+ logger.info(
1228
+ "🚀 Starting subprocess for job %s:\n"
1229
+ " Command: %s\n"
1230
+ " Working directory: %s\n"
1231
+ " Python executable: %s\n"
1232
+ " Environment keys: %s",
1233
+ job.job_id,
1234
+ " ".join(cmd),
1235
+ os.getcwd(),
1236
+ env.get("PYTHON", "python"),
1237
+ ", ".join(sorted([k for k in env if "API" in k or "KEY" in k])),
1238
+ )
1239
+
1240
+ # Get backend URL and API key for progress polling
1241
+ config = load_config()
1242
+ assert config is not None, "load_config() returned None"
1243
+ backend_url = config.backend_url
1244
+ assert isinstance(backend_url, str), (
1245
+ f"config.backend_url must be str, got {type(backend_url).__name__}"
1246
+ )
1247
+ assert backend_url.startswith(("http://", "https://")), (
1248
+ f"backend_url must start with http:// or https://, got {backend_url}"
1249
+ )
1250
+
1251
+ # Get API key from .env file - fail loudly if not found
1252
+ # This is needed for the poller thread, which runs in the worker process
1253
+ try:
1254
+ api_key = _load_synth_api_key()
1255
+ except RuntimeError as e:
1256
+ logger.error(str(e))
1257
+ raise
1258
+
1259
+ # Start background progress poller (will be started once we have backend_job_id)
1260
+ poller_stop = threading.Event()
1261
+ assert poller_stop is not None, "threading.Event() returned None"
1262
+ poller_thread: threading.Thread | None = None
1263
+ backend_job_id: str | None = None
1264
+
1265
+ try:
1266
+ # Stream subprocess output line-by-line to extract backend_job_id and parse progress
1267
+ process = subprocess.Popen(
1268
+ cmd,
1269
+ stdout=subprocess.PIPE,
1270
+ stderr=subprocess.STDOUT,
1271
+ text=True,
1272
+ env=env,
1273
+ bufsize=1, # Line buffered
1274
+ )
1275
+ assert process is not None, "subprocess.Popen() returned None"
1276
+ assert process.stdout is not None, "process.stdout is None"
1277
+
1278
+ stdout_lines: list[str] = []
1279
+ accumulated_output = "" # Accumulate output for better pattern matching
1280
+ last_status_update_time = job_start_time
1281
+ status_update_interval = 5.0 # Update status_json every 5 seconds even without progress
1282
+ assert status_update_interval > 0, (
1283
+ f"status_update_interval must be > 0, got {status_update_interval}"
1284
+ )
1285
+
1286
+ # Read output line-by-line with timeout protection
1287
+ # If subprocess crashes immediately, we need to ensure we capture the error
1288
+ try:
1289
+ # Read output line-by-line
1290
+ for line in process.stdout:
1291
+ assert isinstance(line, str), (
1292
+ f"process.stdout line must be str, got {type(line).__name__}"
1293
+ )
1294
+ stdout_lines.append(line)
1295
+ assert isinstance(accumulated_output, str), (
1296
+ f"accumulated_output must be str, got {type(accumulated_output).__name__}"
1297
+ )
1298
+ accumulated_output += line
1299
+ assert len(accumulated_output) >= len(line), (
1300
+ f"accumulated_output length should increase, got {len(accumulated_output)} < {len(line)}"
1301
+ )
1302
+
1303
+ # Try to extract backend_job_id from output
1304
+ if not backend_job_id:
1305
+ extracted_id = _extract_backend_job_id(line)
1306
+ if extracted_id:
1307
+ # Assert extracted ID is valid before using it
1308
+ assert extracted_id.startswith("pl_"), (
1309
+ f"Invalid backend_job_id format: {extracted_id}"
1310
+ )
1311
+ assert len(extracted_id) > 3, (
1312
+ f"Backend job ID too short: {extracted_id}"
1313
+ )
1314
+
1315
+ backend_job_id = extracted_id
1316
+ logger.info("📋 Extracted backend job ID: %s", backend_job_id)
1317
+
1318
+ # ✅ ADD: Store backend_job_id in status_json for debugging
1319
+ status_tracker.update(custom_fields={"backend_job_id": backend_job_id})
1320
+ logger.info("📋 Stored backend_job_id in status_json for job %s", job.job_id)
1321
+
1322
+ # Update job with backend_job_id
1323
+ with session_scope() as session:
1324
+ db_job = session.get(ExperimentJob, job.job_id)
1325
+ if db_job:
1326
+ db_job.backend_job_id = backend_job_id
1327
+ session.commit()
1328
+
1329
+ # Start progress poller now that we have backend_job_id
1330
+ # API key should already be loaded and validated above
1331
+ if not api_key:
1332
+ raise RuntimeError(
1333
+ f"❌ SYNTH_API_KEY not available for job {job.job_id}. "
1334
+ "This should have been caught earlier - API key loading failed."
1335
+ )
1336
+ elif not backend_url:
1337
+ logger.warning(
1338
+ "⚠️ Cannot start progress poller for job %s: backend_url not configured. "
1339
+ "Progress updates will not be available, but job will continue.",
1340
+ job.job_id,
1341
+ )
1342
+ elif backend_job_id and not backend_job_id.startswith("pl_"):
1343
+ logger.warning(
1344
+ "⚠️ Cannot start progress poller for job %s: invalid backend_job_id format: %s. "
1345
+ "Progress updates will not be available, but job will continue.",
1346
+ job.job_id,
1347
+ backend_job_id,
1348
+ )
1349
+
1350
+ if api_key and backend_url and backend_job_id and backend_job_id.startswith("pl_"):
1351
+ # Validate all inputs before starting thread
1352
+ assert isinstance(backend_job_id, str), (
1353
+ f"backend_job_id must be str, got {type(backend_job_id).__name__}"
1354
+ )
1355
+ assert isinstance(status_tracker, ExperimentStatusTracker), (
1356
+ f"status_tracker must be ExperimentStatusTracker, got {type(status_tracker).__name__}"
1357
+ )
1358
+ assert isinstance(backend_url, str), (
1359
+ f"backend_url must be str, got {type(backend_url).__name__}"
1360
+ )
1361
+ assert isinstance(api_key, str), (
1362
+ f"api_key must be str, got {type(api_key).__name__}"
1363
+ )
1364
+ assert poller_stop is not None, "poller_stop cannot be None"
1365
+
1366
+ poller_thread = threading.Thread(
1367
+ target=_poll_backend_progress,
1368
+ args=(
1369
+ backend_job_id,
1370
+ status_tracker,
1371
+ policy,
1372
+ environment,
1373
+ backend_url,
1374
+ api_key,
1375
+ poller_stop,
1376
+ job_start_time, # Pass job start time for rollouts/min calculation
1377
+ ),
1378
+ daemon=True,
1379
+ )
1380
+ assert poller_thread is not None, "threading.Thread() returned None"
1381
+ poller_thread.start()
1382
+ assert poller_thread.is_alive() or not poller_thread.is_alive(), (
1383
+ "Thread should be startable"
1384
+ )
1385
+ logger.info("📡 Started progress poller for backend job %s", backend_job_id)
1386
+ else:
1387
+ logger.warning(
1388
+ "Cannot start progress poller: missing API key or backend URL"
1389
+ )
1390
+
1391
+ # Parse accumulated output for progress updates (fallback if API polling fails)
1392
+ # Use accumulated output (not just current line) for better pattern matching
1393
+ # Update status_json periodically even without progress data to show elapsed time
1394
+ current_time = time.time()
1395
+ assert current_time >= job_start_time, (
1396
+ f"current_time ({current_time}) < job_start_time ({job_start_time})"
1397
+ )
1398
+ assert isinstance(accumulated_output, str), (
1399
+ f"accumulated_output must be str, got {type(accumulated_output).__name__}"
1400
+ )
1401
+
1402
+ should_update = (
1403
+ # Update if we find progress patterns
1404
+ "rollouts=" in line.lower() or
1405
+ "progress:" in line.lower() or
1406
+ "gepa progress:" in line.lower() or
1407
+ # Or update periodically (every 5 seconds) to show elapsed time
1408
+ (current_time - last_status_update_time) >= status_update_interval
1409
+ )
1410
+ assert isinstance(should_update, bool), (
1411
+ f"should_update must be bool, got {type(should_update).__name__}"
1412
+ )
1413
+
1414
+ if should_update:
1415
+ # Validate accumulated_output before parsing
1416
+ assert len(accumulated_output) > 0, "accumulated_output cannot be empty"
1417
+ output_to_parse = accumulated_output[-5000:] # Last 5KB to avoid parsing huge outputs
1418
+ assert isinstance(output_to_parse, str), (
1419
+ f"output_to_parse must be str, got {type(output_to_parse).__name__}"
1420
+ )
1421
+ assert len(output_to_parse) <= len(accumulated_output), (
1422
+ f"output_to_parse length ({len(output_to_parse)}) > accumulated_output length ({len(accumulated_output)})"
1423
+ )
1424
+
1425
+ update_status_from_output(
1426
+ status_tracker,
1427
+ output_to_parse,
1428
+ policy=policy,
1429
+ environment=environment,
1430
+ start_time=job_start_time,
1431
+ )
1432
+ last_status_update_time = current_time
1433
+ assert last_status_update_time >= job_start_time, (
1434
+ f"last_status_update_time ({last_status_update_time}) < job_start_time ({job_start_time})"
1435
+ )
1436
+ except (BrokenPipeError, OSError) as e:
1437
+ # Subprocess may have crashed - log and continue to wait() to get returncode
1438
+ logger.warning(
1439
+ "Error reading subprocess stdout for job %s (process may have crashed): %s",
1440
+ job.job_id,
1441
+ e,
1442
+ )
1443
+ # Continue to process.wait() to get the returncode and any buffered output
1444
+
1445
+ # Wait for process to complete (ALWAYS wait, even if stdout reading failed)
1446
+ assert process is not None, "process is None before wait()"
1447
+ returncode = process.wait()
1448
+
1449
+ # If stdout reading failed but process exited, try to read any remaining buffered output
1450
+ if process.stdout and not stdout_lines:
1451
+ try:
1452
+ remaining_output = process.stdout.read()
1453
+ if remaining_output:
1454
+ stdout_lines.append(remaining_output)
1455
+ accumulated_output += remaining_output
1456
+ logger.info(
1457
+ "Captured remaining subprocess output for job %s after process exit: %d bytes",
1458
+ job.job_id,
1459
+ len(remaining_output),
1460
+ )
1461
+ except Exception as e:
1462
+ logger.warning(
1463
+ "Failed to read remaining subprocess output for job %s: %s",
1464
+ job.job_id,
1465
+ e,
1466
+ )
1467
+ assert isinstance(returncode, int), (
1468
+ f"process.wait() must return int, got {type(returncode).__name__}: {returncode}"
1469
+ )
1470
+
1471
+ # Combine output
1472
+ assert isinstance(stdout_lines, list), (
1473
+ f"stdout_lines must be list, got {type(stdout_lines).__name__}"
1474
+ )
1475
+ assert all(isinstance(line, str) for line in stdout_lines), (
1476
+ f"All stdout_lines must be str, got types: {[type(line).__name__ for line in stdout_lines[:5]]}"
1477
+ )
1478
+
1479
+ stdout = "".join(stdout_lines)
1480
+ assert isinstance(stdout, str), (
1481
+ f"stdout must be str, got {type(stdout).__name__}"
1482
+ )
1483
+ stderr = "" # stderr is redirected to stdout
1484
+ assert isinstance(stderr, str), (
1485
+ f"stderr must be str, got {type(stderr).__name__}"
1486
+ )
1487
+
1488
+ # CRITICAL: If subprocess failed but we have no output, log a warning
1489
+ # This indicates the subprocess crashed before producing any output
1490
+ if returncode != 0 and not stdout:
1491
+ logger.error(
1492
+ "❌ Subprocess for job %s exited with code %d but produced NO output. "
1493
+ "This usually indicates an immediate crash (import error, syntax error, etc.). "
1494
+ "Command: %s",
1495
+ job.job_id,
1496
+ returncode,
1497
+ " ".join(cmd),
1498
+ )
1499
+ # Set a helpful error message
1500
+ stdout = (
1501
+ f"[ERROR] Subprocess crashed immediately with exit code {returncode}. "
1502
+ f"No output captured. This usually indicates:\n"
1503
+ f" 1. Import error (missing module)\n"
1504
+ f" 2. Syntax error in Python code\n"
1505
+ f" 3. Missing executable or PATH issue\n"
1506
+ f" 4. Permission error\n"
1507
+ f"\nCommand: {' '.join(cmd)}\n"
1508
+ f"Working directory: {os.getcwd()}\n"
1509
+ f"Python: {env.get('PYTHON', 'python')}"
1510
+ )
1511
+
1512
+ # Create CompletedProcess-like object for compatibility
1513
+ class CompletedProcess:
1514
+ def __init__(self, returncode: int, stdout: str, stderr: str):
1515
+ assert isinstance(returncode, int), (
1516
+ f"returncode must be int, got {type(returncode).__name__}"
1517
+ )
1518
+ assert isinstance(stdout, str), (
1519
+ f"stdout must be str, got {type(stdout).__name__}"
1520
+ )
1521
+ assert isinstance(stderr, str), (
1522
+ f"stderr must be str, got {type(stderr).__name__}"
1523
+ )
1524
+ self.returncode = returncode
1525
+ self.stdout = stdout
1526
+ self.stderr = stderr
1527
+
1528
+ completed = CompletedProcess(returncode, stdout, stderr)
1529
+ assert isinstance(completed, CompletedProcess), (
1530
+ f"CompletedProcess() must return CompletedProcess, got {type(completed).__name__}"
1531
+ )
1532
+
1533
+ logger.info(
1534
+ "✅ Subprocess completed for job %s:\n"
1535
+ " Return code: %s\n"
1536
+ " Stdout length: %d chars\n"
1537
+ " Stderr length: %d chars",
1538
+ job.job_id,
1539
+ completed.returncode,
1540
+ len(completed.stdout) if completed.stdout else 0,
1541
+ len(completed.stderr) if completed.stderr else 0,
1542
+ )
1543
+
1544
+ # Final status update from complete output
1545
+ assert isinstance(completed.stdout, str), (
1546
+ f"completed.stdout must be str before final update, got {type(completed.stdout).__name__}"
1547
+ )
1548
+ assert len(completed.stdout) > 0 or len(accumulated_output) > 0, (
1549
+ "Must have some output for final status update"
1550
+ )
1551
+
1552
+ # Use accumulated_output if available (more complete), otherwise stdout
1553
+ final_output = accumulated_output if accumulated_output else completed.stdout
1554
+ assert isinstance(final_output, str), (
1555
+ f"final_output must be str, got {type(final_output).__name__}"
1556
+ )
1557
+
1558
+ update_status_from_output(
1559
+ status_tracker,
1560
+ final_output,
1561
+ policy=policy,
1562
+ environment=environment,
1563
+ start_time=job_start_time,
1564
+ )
1565
+ except subprocess.TimeoutExpired as e:
1566
+ logger.error("⏱️ Subprocess TIMEOUT for job %s after %s seconds", job.job_id, e.timeout)
1567
+ raise
1568
+ except Exception as e:
1569
+ logger.error(
1570
+ "❌ Subprocess EXCEPTION for job %s:\n"
1571
+ " Type: %s\n"
1572
+ " Message: %s",
1573
+ job.job_id,
1574
+ type(e).__name__,
1575
+ str(e),
1576
+ exc_info=True,
1577
+ )
1578
+ raise
1579
+ finally:
1580
+ # Stop progress poller
1581
+ if poller_thread and poller_thread.is_alive():
1582
+ poller_stop.set()
1583
+ poller_thread.join(timeout=5)
1584
+ logger.info("📡 Stopped progress poller for job %s", job.job_id)
1585
+
1586
+ # Log full output for debugging - prioritize auth errors
1587
+ logger.info("Training command returncode: %s", completed.returncode)
1588
+
1589
+ # Check for critical errors FIRST - these should cause failure even if returncode is 0
1590
+ stdout_lower = (completed.stdout or "").lower()
1591
+ stderr_lower = (completed.stderr or "").lower()
1592
+ combined_output = (completed.stdout or "") + "\n" + (completed.stderr or "")
1593
+ combined_lower = combined_output.lower()
1594
+
1595
+ # Check for health check failures (common cause of silent failures)
1596
+ health_check_failures = []
1597
+ health_check_details = []
1598
+ if "health check failed" in combined_lower or "aborting due to failing health check" in combined_lower:
1599
+ # Extract full context around health check failure - look for error patterns
1600
+ for source_name, source_text in [("STDOUT", completed.stdout), ("STDERR", completed.stderr)]:
1601
+ if not source_text:
1602
+ continue
1603
+ source_lower = source_text.lower()
1604
+ if "health check" in source_lower:
1605
+ # Find health check failure message
1606
+ idx = source_lower.find("health check")
1607
+ start = max(0, idx - 200)
1608
+ end = min(len(source_text), idx + 500)
1609
+ health_check_failures.append(f"{source_name} (health check context):\n{source_text[start:end]}")
1610
+
1611
+ # Also look for error patterns that might explain WHY it failed
1612
+ # Look for HTTP status codes, error messages, exceptions
1613
+ if "500" in source_text or "internal server error" in source_lower:
1614
+ # Find the 500 error context
1615
+ error_idx = source_lower.find("500") if "500" in source_text else source_lower.find("internal server error")
1616
+ if error_idx >= 0:
1617
+ error_start = max(0, error_idx - 100)
1618
+ error_end = min(len(source_text), error_idx + 800)
1619
+ health_check_details.append(f"{source_name} (500 error details):\n{source_text[error_start:error_end]}")
1620
+
1621
+ # Look for tracebacks or exception messages
1622
+ if "traceback" in source_lower or "exception" in source_lower or "error:" in source_lower:
1623
+ # Find traceback/exception
1624
+ tb_idx = source_lower.find("traceback") if "traceback" in source_lower else (
1625
+ source_lower.find("exception") if "exception" in source_lower else source_lower.find("error:")
1626
+ )
1627
+ if tb_idx >= 0:
1628
+ tb_start = max(0, tb_idx - 50)
1629
+ tb_end = min(len(source_text), tb_idx + 1500) # Get more context for tracebacks
1630
+ health_check_details.append(f"{source_name} (exception/traceback):\n{source_text[tb_start:tb_end]}")
1631
+
1632
+ # Look for specific error messages like "ModuleNotFoundError", "RuntimeError", etc.
1633
+ error_patterns = [
1634
+ r"(ModuleNotFoundError|ImportError|RuntimeError|ValueError|KeyError|AttributeError)[^\n]*",
1635
+ r"Failed to [^\n]+",
1636
+ r"Unable to [^\n]+",
1637
+ r"Missing [^\n]+",
1638
+ ]
1639
+ for pattern in error_patterns:
1640
+ matches = re.finditer(pattern, source_text, re.IGNORECASE | re.MULTILINE)
1641
+ for match in matches:
1642
+ match_start = max(0, match.start() - 100)
1643
+ match_end = min(len(source_text), match.end() + 300)
1644
+ health_check_details.append(f"{source_name} (error pattern '{pattern[:30]}...'):\n{source_text[match_start:match_end]}")
1645
+
1646
+ if health_check_failures:
1647
+ success = False
1648
+ # Build informative error message
1649
+ error_parts = [
1650
+ "Training command failed health check. Task app endpoint returned error.",
1651
+ ]
1652
+ if health_check_details:
1653
+ error_parts.append("See details below for root cause.")
1654
+ else:
1655
+ error_parts.append("Check task app logs and ensure /task_info endpoint is working.")
1656
+
1657
+ error_message = " ".join(error_parts)
1658
+
1659
+ logger.error(
1660
+ "🚨 HEALTH CHECK FAILURE for job %s:\n%s",
1661
+ job.job_id,
1662
+ "\n".join(health_check_failures),
1663
+ )
1664
+
1665
+ if health_check_details:
1666
+ logger.error(
1667
+ "🔍 ROOT CAUSE ANALYSIS for job %s:\n%s",
1668
+ job.job_id,
1669
+ "\n" + "="*80 + "\n".join(health_check_details) + "\n" + "="*80,
1670
+ )
1671
+
1672
+ # Check for authentication-related errors
1673
+ auth_keywords = [
1674
+ "authentication",
1675
+ "authorization",
1676
+ "api key",
1677
+ "api_key",
1678
+ "missing api",
1679
+ "invalid api",
1680
+ "unauthorized",
1681
+ "forbidden",
1682
+ "401",
1683
+ "403",
1684
+ "missing",
1685
+ "not set",
1686
+ "required",
1687
+ ]
1688
+
1689
+ auth_errors = []
1690
+ for keyword in auth_keywords:
1691
+ if keyword in stdout_lower:
1692
+ # Extract context around the keyword
1693
+ idx = stdout_lower.find(keyword)
1694
+ start = max(0, idx - 100)
1695
+ end = min(len(completed.stdout), idx + 200)
1696
+ auth_errors.append(f"STDOUT: ...{completed.stdout[start:end]}...")
1697
+ if keyword in stderr_lower:
1698
+ idx = stderr_lower.find(keyword)
1699
+ start = max(0, idx - 100)
1700
+ end = min(len(completed.stderr), idx + 200)
1701
+ auth_errors.append(f"STDERR: ...{completed.stderr[start:end]}...")
1702
+
1703
+ if auth_errors:
1704
+ logger.error(
1705
+ "🚨 AUTHENTICATION ERRORS DETECTED for job %s:\n%s",
1706
+ job.job_id,
1707
+ "\n".join(auth_errors),
1708
+ )
1709
+
1710
+ # Log full output (especially important for errors)
1711
+ if completed.stdout:
1712
+ if not success:
1713
+ # For errors, log full output
1714
+ logger.error("Training command stdout (FULL, %d chars):\n%s", len(completed.stdout), completed.stdout)
1715
+ else:
1716
+ # For success, log last 2000 chars
1717
+ logger.info("Training command stdout (last 2000 chars):\n%s", completed.stdout[-2000:])
1718
+ else:
1719
+ logger.warning("Training command stdout is EMPTY - command may have exited before producing output")
1720
+
1721
+ if completed.stderr:
1722
+ if not success:
1723
+ # For errors, log full output
1724
+ logger.error("Training command stderr (FULL, %d chars):\n%s", len(completed.stderr), completed.stderr)
1725
+ else:
1726
+ # For success, log last 2000 chars
1727
+ logger.warning("Training command stderr (last 2000 chars):\n%s", completed.stderr[-2000:])
1728
+ else:
1729
+ logger.info("Training command stderr is empty")
1730
+ # Validate inputs before collecting results
1731
+ assert prepared is not None, "prepared cannot be None"
1732
+ assert isinstance(prepared, PreparedConfig), (
1733
+ f"prepared must be PreparedConfig, got {type(prepared).__name__}"
1734
+ )
1735
+ assert isinstance(prepared.results_folder, Path), (
1736
+ f"prepared.results_folder must be Path, got {type(prepared.results_folder).__name__}"
1737
+ )
1738
+ assert isinstance(completed.stdout, str), (
1739
+ f"completed.stdout must be str, got {type(completed.stdout).__name__}"
1740
+ )
1741
+ assert isinstance(completed.stderr, str), (
1742
+ f"completed.stderr must be str, got {type(completed.stderr).__name__}"
1743
+ )
1744
+
1745
+ artifact_summary = collect_result_summary(
1746
+ prepared.results_folder,
1747
+ stdout=completed.stdout,
1748
+ stderr=completed.stderr,
1749
+ )
1750
+ assert isinstance(artifact_summary, ResultSummary), (
1751
+ f"collect_result_summary must return ResultSummary, got {type(artifact_summary).__name__}"
1752
+ )
1753
+
1754
+ artifact_summary.stdout = _truncate(completed.stdout)
1755
+ assert isinstance(artifact_summary.stdout, str), (
1756
+ f"artifact_summary.stdout must be str after truncate, got {type(artifact_summary.stdout).__name__}"
1757
+ )
1758
+ artifact_summary.stderr = _truncate(completed.stderr)
1759
+ assert isinstance(artifact_summary.stderr, str), (
1760
+ f"artifact_summary.stderr must be str after truncate, got {type(artifact_summary.stderr).__name__}"
1761
+ )
1762
+ artifact_summary.returncode = completed.returncode
1763
+ assert isinstance(artifact_summary.returncode, int), (
1764
+ f"artifact_summary.returncode must be int, got {type(artifact_summary.returncode).__name__}"
1765
+ )
1766
+ summary = artifact_summary
1767
+ assert isinstance(summary, ResultSummary), (
1768
+ f"summary must be ResultSummary, got {type(summary).__name__}"
1769
+ )
1770
+
1771
+ # ✅ FIX: If summary.total_rollouts is None, try to fetch from backend metadata stats
1772
+ # This handles cases where CLI output parsing fails but backend has accurate stats
1773
+ if summary.total_rollouts is None and backend_job_id:
1774
+ try:
1775
+ import requests
1776
+
1777
+ config = load_config()
1778
+ backend_url = config.backend_url
1779
+ try:
1780
+ api_key = _load_synth_api_key()
1781
+ except RuntimeError:
1782
+ api_key = None
1783
+
1784
+ if backend_url and api_key:
1785
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{backend_job_id}"
1786
+ headers = {"Authorization": f"Bearer {api_key}"}
1787
+ resp = requests.get(url, headers=headers, timeout=10.0)
1788
+
1789
+ if resp.status_code == 200:
1790
+ backend_job = resp.json()
1791
+ backend_metadata = backend_job.get("metadata", {})
1792
+ backend_stats = backend_metadata.get("stats", {})
1793
+
1794
+ # Try to get total_rollouts from backend stats
1795
+ # Prefer total_rollouts, fallback to sum of optimization + validation rollouts
1796
+ backend_total_rollouts = backend_stats.get("total_rollouts")
1797
+ if backend_total_rollouts is None:
1798
+ opt_rollouts = backend_stats.get("optimization_rollouts_executed", 0) or 0
1799
+ val_rollouts = backend_stats.get("validation_rollouts_executed", 0) or 0
1800
+ if opt_rollouts > 0 or val_rollouts > 0:
1801
+ backend_total_rollouts = opt_rollouts + val_rollouts
1802
+
1803
+ if backend_total_rollouts is not None and backend_total_rollouts > 0:
1804
+ summary.total_rollouts = backend_total_rollouts
1805
+ logger.info(
1806
+ "✅ Extracted total_rollouts=%d from backend metadata stats for job %s (backend_job_id=%s)",
1807
+ backend_total_rollouts,
1808
+ job.job_id,
1809
+ backend_job_id,
1810
+ )
1811
+ except Exception as e:
1812
+ # Log but don't fail - backend fetch is best-effort fallback
1813
+ logger.debug(
1814
+ "Could not fetch backend stats to extract rollouts for job %s: %s",
1815
+ job.job_id,
1816
+ e,
1817
+ )
1818
+
1819
+ # Check if training actually ran - for prompt learning (GEPA/MIPRO), we expect results
1820
+ # Note: success may have been set to False above if health check failed
1821
+ if not error_message: # Only check returncode if we haven't already detected a failure
1822
+ success = completed.returncode == 0
1823
+ if success and job.job_type == "gepa":
1824
+ # GEPA should produce rollouts - that's the primary indicator of success
1825
+ # If returncode is 0 but no rollouts were produced, it failed silently
1826
+ if summary.total_rollouts is None or summary.total_rollouts == 0:
1827
+ success = False
1828
+ error_message = (
1829
+ "Training command exited with returncode 0 but produced no rollouts. "
1830
+ "This indicates GEPA did not actually run. "
1831
+ f"Check stdout/stderr for errors. "
1832
+ f"Results folder: {prepared.results_folder}"
1833
+ )
1834
+ logger.error(
1835
+ "Job %s failed silently: %s\nStdout tail:\n%s\nStderr tail:\n%s",
1836
+ job.job_id,
1837
+ error_message,
1838
+ summary.stdout[-1000:] if summary.stdout else "(empty)",
1839
+ summary.stderr[-1000:] if summary.stderr else "(empty)",
1840
+ )
1841
+ else:
1842
+ # We have rollouts - that's sufficient evidence GEPA ran successfully
1843
+ # Learning curve and stats are nice-to-have but not required
1844
+ logger.info(
1845
+ "Job %s completed successfully with %d rollouts (best_score=%s, learning_curve_points=%d, stats=%s)",
1846
+ job.job_id,
1847
+ summary.total_rollouts,
1848
+ summary.best_score,
1849
+ len(summary.learning_curve_points),
1850
+ "yes" if summary.stats else "no",
1851
+ )
1852
+
1853
+ if not success and not error_message:
1854
+ # Build detailed error message with FULL stdout/stderr
1855
+ error_parts = [f"Training command exited with {completed.returncode}"]
1856
+
1857
+ # Include FULL stdout if available (for errors, we want complete context)
1858
+ if completed.stdout:
1859
+ error_parts.append(f"\n\n{'='*80}\nSTDOUT (FULL, {len(completed.stdout)} chars):\n{'='*80}\n{completed.stdout}")
1860
+ else:
1861
+ error_parts.append("\n\nStdout: (empty - subprocess may have crashed immediately)")
1862
+
1863
+ # Include FULL stderr if available
1864
+ if completed.stderr:
1865
+ error_parts.append(f"\n\n{'='*80}\nSTDERR (FULL, {len(completed.stderr)} chars):\n{'='*80}\n{completed.stderr}")
1866
+ else:
1867
+ error_parts.append("\n\nStderr: (empty)")
1868
+
1869
+ error_message = "".join(error_parts)
1870
+
1871
+ # Log full error (truncate only for logger, but keep full in error_message)
1872
+ logger.error(
1873
+ "Job %s failed: %s\nFull stdout (%d chars):\n%s\nFull stderr (%d chars):\n%s",
1874
+ job.job_id,
1875
+ f"Training command exited with {completed.returncode}",
1876
+ len(completed.stdout) if completed.stdout else 0,
1877
+ completed.stdout if completed.stdout else "(empty)",
1878
+ len(completed.stderr) if completed.stderr else 0,
1879
+ completed.stderr if completed.stderr else "(empty)",
1880
+ )
1881
+ except Exception as exc:
1882
+ error_message = str(exc)
1883
+ summary.stderr = _truncate((summary.stderr or "") + f"\n{error_message}")
1884
+ logger.exception("Job %s encountered error: %s", job.job_id, error_message)
1885
+ finally:
1886
+ if prepared:
1887
+ prepared.cleanup()
1888
+
1889
+ # Prepare execution details for logging
1890
+ command_str = " ".join(cmd) if cmd is not None and len(cmd) > 0 else None
1891
+ working_dir = os.getcwd()
1892
+ if env is not None:
1893
+ python_exe = env.get("PYTHON", "python")
1894
+ env_keys = list(env.keys())
1895
+ else:
1896
+ python_exe = None
1897
+ env_keys = None
1898
+
1899
+ return _finalize_job(
1900
+ job.job_id,
1901
+ summary=summary,
1902
+ success=success,
1903
+ error_message=error_message,
1904
+ command=command_str,
1905
+ working_directory=working_dir,
1906
+ python_executable=python_exe,
1907
+ environment_keys=env_keys,
1908
+ )
1909
+
1910
+
1911
+ @celery_app.task(name="synth_ai.cli.local.experiment_queue.process_experiment_queue")
1912
+ def process_experiment_queue() -> dict[str, Any]:
1913
+ """Periodic task that checks for queued jobs and dispatches them.
1914
+
1915
+ This task runs every 5 seconds (via Celery Beat) to ensure queued jobs
1916
+ are dispatched even if:
1917
+ - Previous dispatch attempts failed
1918
+ - Jobs were queued while other jobs were running
1919
+ - Worker restarted and missed dispatch events
1920
+
1921
+ Returns a summary of dispatched jobs.
1922
+ """
1923
+ # Verify we're using the correct database
1924
+ from .config import load_config
1925
+ config = load_config()
1926
+ env_db_path = os.getenv("EXPERIMENT_QUEUE_DB_PATH")
1927
+ if env_db_path:
1928
+ from pathlib import Path
1929
+ env_db_path_resolved = Path(env_db_path).expanduser().resolve()
1930
+ if config.sqlite_path != env_db_path_resolved:
1931
+ logger.error(
1932
+ "Database path mismatch in periodic task! ENV: %s != CONFIG: %s",
1933
+ env_db_path_resolved,
1934
+ config.sqlite_path,
1935
+ )
1936
+
1937
+ logger.debug("Processing experiment queue for queued jobs (database: %s)", config.sqlite_path)
1938
+ dispatched_count = 0
1939
+ experiments_checked = 0
1940
+
1941
+ with session_scope() as session:
1942
+ # Find all running or queued experiments that might have jobs to dispatch
1943
+ active_experiments = (
1944
+ session.query(Experiment)
1945
+ .filter(
1946
+ Experiment.status.in_([ExperimentStatus.QUEUED, ExperimentStatus.RUNNING])
1947
+ )
1948
+ .all()
1949
+ )
1950
+
1951
+ for experiment in active_experiments:
1952
+ experiments_checked += 1
1953
+ # Check if there are any queued jobs without celery_task_id
1954
+ queued_jobs = (
1955
+ session.query(ExperimentJob)
1956
+ .filter(
1957
+ ExperimentJob.experiment_id == experiment.experiment_id,
1958
+ ExperimentJob.status == ExperimentJobStatus.QUEUED,
1959
+ ExperimentJob.celery_task_id.is_(None),
1960
+ )
1961
+ .count()
1962
+ )
1963
+
1964
+ if queued_jobs > 0:
1965
+ logger.debug(
1966
+ "Found %d queued jobs for experiment %s, attempting dispatch",
1967
+ queued_jobs,
1968
+ experiment.experiment_id,
1969
+ )
1970
+ dispatched = dispatch_available_jobs(session, experiment.experiment_id)
1971
+ dispatched_count += len(dispatched)
1972
+ if dispatched:
1973
+ logger.info(
1974
+ "Dispatched %d jobs for experiment %s",
1975
+ len(dispatched),
1976
+ experiment.experiment_id,
1977
+ )
1978
+
1979
+ result = {
1980
+ "dispatched": dispatched_count,
1981
+ "experiments_checked": experiments_checked,
1982
+ }
1983
+ logger.debug("Queue check completed: %s", result)
1984
+ return result