synth-ai 0.2.14__py3-none-any.whl → 0.4.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (1086) hide show
  1. synth_ai/__init__.py +25 -46
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +98 -72
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/demo/__init__.py +3 -0
  25. synth_ai/cli/commands/demo/core.py +153 -0
  26. synth_ai/cli/commands/eval/__init__.py +10 -0
  27. synth_ai/cli/commands/eval/config.py +338 -0
  28. synth_ai/cli/commands/eval/core.py +258 -0
  29. synth_ai/cli/commands/eval/runner.py +704 -0
  30. synth_ai/cli/commands/eval/validation.py +60 -0
  31. synth_ai/cli/commands/filter/__init__.py +12 -0
  32. synth_ai/cli/commands/filter/core.py +424 -0
  33. synth_ai/cli/commands/filter/errors.py +55 -0
  34. synth_ai/cli/commands/filter/validation.py +77 -0
  35. synth_ai/cli/commands/help/__init__.py +185 -0
  36. synth_ai/cli/commands/help/core.py +72 -0
  37. synth_ai/cli/commands/scan/__init__.py +19 -0
  38. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  39. synth_ai/cli/commands/scan/core.py +344 -0
  40. synth_ai/cli/commands/scan/health_checker.py +242 -0
  41. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  42. synth_ai/cli/commands/scan/models.py +83 -0
  43. synth_ai/cli/commands/smoke/__init__.py +7 -0
  44. synth_ai/cli/commands/smoke/core.py +1428 -0
  45. synth_ai/cli/commands/status/__init__.py +3 -0
  46. synth_ai/cli/commands/status/client.py +91 -0
  47. synth_ai/cli/commands/status/config.py +12 -0
  48. synth_ai/cli/commands/status/errors.py +11 -0
  49. synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
  50. synth_ai/cli/commands/status/subcommands/config.py +13 -0
  51. synth_ai/cli/commands/status/subcommands/files.py +34 -0
  52. synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
  53. synth_ai/cli/commands/status/subcommands/models.py +35 -0
  54. synth_ai/cli/commands/status/subcommands/runs.py +34 -0
  55. synth_ai/cli/commands/status/subcommands/session.py +77 -0
  56. synth_ai/cli/commands/status/subcommands/summary.py +39 -0
  57. synth_ai/cli/commands/status/subcommands/utils.py +41 -0
  58. synth_ai/cli/commands/status/utils.py +23 -0
  59. synth_ai/cli/commands/train/__init__.py +51 -0
  60. synth_ai/cli/commands/train/core.py +22 -0
  61. synth_ai/cli/commands/train/errors.py +117 -0
  62. synth_ai/cli/commands/train/prompt_learning_validation.py +632 -0
  63. synth_ai/cli/commands/train/validation.py +392 -0
  64. synth_ai/cli/commands/train/verifier_schemas.py +200 -0
  65. synth_ai/cli/commands/train/verifier_validation.py +235 -0
  66. synth_ai/cli/demo_apps/__init__.py +10 -0
  67. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  68. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  69. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  70. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  71. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  72. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  73. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  74. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  75. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  79. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  80. synth_ai/cli/demo_apps/math/_common.py +16 -0
  81. synth_ai/cli/demo_apps/math/app.py +38 -0
  82. synth_ai/cli/demo_apps/math/config.toml +75 -0
  83. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  84. synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
  85. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  86. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  87. synth_ai/cli/demo_apps/mipro/task_app.py +911 -0
  88. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  89. synth_ai/cli/demos/__init__.py +12 -0
  90. synth_ai/cli/demos/demo.py +32 -0
  91. synth_ai/cli/demos/rl_demo.py +254 -0
  92. synth_ai/cli/deploy.py +216 -0
  93. synth_ai/cli/infra/__init__.py +14 -0
  94. synth_ai/cli/infra/balance.py +216 -0
  95. synth_ai/cli/infra/mcp.py +35 -0
  96. synth_ai/cli/infra/modal_app.py +36 -0
  97. synth_ai/cli/infra/setup.py +69 -0
  98. synth_ai/cli/infra/status.py +16 -0
  99. synth_ai/cli/infra/turso.py +77 -0
  100. synth_ai/cli/lib/__init__.py +10 -0
  101. synth_ai/cli/lib/agents.py +76 -0
  102. synth_ai/cli/lib/apps/modal_app.py +101 -0
  103. synth_ai/cli/lib/apps/task_app.py +642 -0
  104. synth_ai/cli/lib/bin.py +39 -0
  105. synth_ai/cli/lib/env.py +375 -0
  106. synth_ai/cli/lib/errors.py +85 -0
  107. synth_ai/cli/lib/modal.py +315 -0
  108. synth_ai/cli/lib/plotting.py +126 -0
  109. synth_ai/cli/lib/prompt_args.py +39 -0
  110. synth_ai/cli/lib/prompts.py +284 -0
  111. synth_ai/cli/lib/sqld.py +122 -0
  112. synth_ai/cli/lib/task_app_discovery.py +884 -0
  113. synth_ai/cli/lib/task_app_env.py +295 -0
  114. synth_ai/cli/lib/train_cfgs.py +300 -0
  115. synth_ai/cli/lib/tunnel_records.py +207 -0
  116. synth_ai/cli/local/__init__.py +14 -0
  117. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  118. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  119. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  120. synth_ai/cli/local/experiment_queue/config.py +128 -0
  121. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  122. synth_ai/cli/local/experiment_queue/database.py +175 -0
  123. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  124. synth_ai/cli/local/experiment_queue/models.py +231 -0
  125. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  126. synth_ai/cli/local/experiment_queue/results.py +373 -0
  127. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  128. synth_ai/cli/local/experiment_queue/service.py +344 -0
  129. synth_ai/cli/local/experiment_queue/status.py +372 -0
  130. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  131. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  132. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  133. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  134. synth_ai/cli/local/session/__init__.py +92 -0
  135. synth_ai/cli/local/session/client.py +383 -0
  136. synth_ai/cli/local/session/constants.py +63 -0
  137. synth_ai/cli/local/session/exceptions.py +105 -0
  138. synth_ai/cli/local/session/manager.py +139 -0
  139. synth_ai/cli/local/session/models.py +89 -0
  140. synth_ai/cli/local/session/query.py +110 -0
  141. synth_ai/cli/root.py +30 -6
  142. synth_ai/cli/task_apps/__init__.py +37 -0
  143. synth_ai/cli/task_apps/commands.py +3145 -0
  144. synth_ai/cli/task_apps/deploy.py +7 -0
  145. synth_ai/cli/task_apps/list.py +26 -0
  146. synth_ai/cli/task_apps/main.py +36 -0
  147. synth_ai/cli/task_apps/modal_serve.py +11 -0
  148. synth_ai/cli/task_apps/serve.py +11 -0
  149. synth_ai/cli/training/__init__.py +8 -0
  150. synth_ai/cli/training/train.py +5 -0
  151. synth_ai/cli/training/train_cfg.py +34 -0
  152. synth_ai/cli/training/watch.py +506 -0
  153. synth_ai/cli/turso.py +34 -55
  154. synth_ai/cli/utils/__init__.py +8 -0
  155. synth_ai/cli/utils/experiments.py +235 -0
  156. synth_ai/cli/utils/queue.py +504 -0
  157. synth_ai/cli/utils/recent.py +133 -0
  158. synth_ai/cli/utils/traces.py +164 -0
  159. synth_ai/contracts/__init__.py +67 -0
  160. synth_ai/core/__init__.py +100 -0
  161. synth_ai/core/_utils/__init__.py +54 -0
  162. synth_ai/core/_utils/base_url.py +10 -0
  163. synth_ai/core/_utils/http.py +10 -0
  164. synth_ai/core/_utils/prompts.py +14 -0
  165. synth_ai/core/_utils/task_app_state.py +12 -0
  166. synth_ai/core/_utils/user_config.py +10 -0
  167. synth_ai/core/apps/common.py +116 -0
  168. synth_ai/core/auth.py +95 -0
  169. synth_ai/core/cfgs.py +240 -0
  170. synth_ai/core/config/__init__.py +16 -0
  171. synth_ai/core/config/base.py +168 -0
  172. synth_ai/core/config/resolver.py +89 -0
  173. synth_ai/core/env.py +231 -0
  174. synth_ai/core/errors.py +125 -0
  175. synth_ai/core/http.py +230 -0
  176. synth_ai/core/integrations/__init__.py +11 -0
  177. synth_ai/core/integrations/cloudflare.py +1886 -0
  178. synth_ai/core/integrations/mcp/__init__.py +6 -0
  179. synth_ai/core/integrations/mcp/__main__.py +8 -0
  180. synth_ai/core/integrations/mcp/claude.py +36 -0
  181. synth_ai/core/integrations/mcp/main.py +254 -0
  182. synth_ai/core/integrations/mcp/setup.py +100 -0
  183. synth_ai/core/integrations/modal.py +277 -0
  184. synth_ai/core/json.py +72 -0
  185. synth_ai/core/log_filter.py +99 -0
  186. synth_ai/core/logging.py +82 -0
  187. synth_ai/core/paths.py +107 -0
  188. synth_ai/core/pricing.py +109 -0
  189. synth_ai/core/process.py +233 -0
  190. synth_ai/core/ssl.py +25 -0
  191. synth_ai/core/storage/__init__.py +71 -0
  192. synth_ai/core/task_app_state.py +318 -0
  193. synth_ai/core/telemetry.py +282 -0
  194. synth_ai/core/tracing_v3/__init__.py +99 -0
  195. synth_ai/core/tracing_v3/abstractions.py +348 -0
  196. synth_ai/core/tracing_v3/config.py +229 -0
  197. synth_ai/core/tracing_v3/constants.py +21 -0
  198. synth_ai/core/tracing_v3/db_config.py +182 -0
  199. synth_ai/core/tracing_v3/decorators.py +401 -0
  200. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  201. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  202. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  203. synth_ai/core/tracing_v3/storage/base.py +211 -0
  204. synth_ai/core/tracing_v3/storage/config.py +109 -0
  205. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  206. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  207. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  208. synth_ai/core/tracing_v3/turso/models.py +470 -0
  209. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  210. synth_ai/core/tracing_v3/utils.py +108 -0
  211. synth_ai/core/urls.py +18 -0
  212. synth_ai/core/user_config.py +137 -0
  213. synth_ai/core/uvicorn.py +222 -0
  214. synth_ai/data/__init__.py +83 -0
  215. synth_ai/data/enums.py +122 -0
  216. synth_ai/data/rewards.py +249 -0
  217. synth_ai/data/traces.py +35 -0
  218. synth_ai/products/__init__.py +6 -0
  219. synth_ai/products/graph_evolve/__init__.py +45 -0
  220. synth_ai/products/graph_evolve/client.py +226 -0
  221. synth_ai/products/graph_evolve/config.py +591 -0
  222. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  223. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  224. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  225. synth_ai/products/graph_evolve/run.py +222 -0
  226. synth_ai/products/graph_gepa/__init__.py +23 -0
  227. synth_ai/products/graph_gepa/converters/__init__.py +19 -0
  228. synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
  229. synth_ai/sdk/__init__.py +129 -0
  230. synth_ai/sdk/api/__init__.py +1 -0
  231. synth_ai/sdk/api/eval/__init__.py +33 -0
  232. synth_ai/sdk/api/eval/job.py +732 -0
  233. synth_ai/sdk/api/models/supported.py +514 -0
  234. synth_ai/sdk/api/research_agent/__init__.py +296 -0
  235. synth_ai/sdk/api/train/__init__.py +85 -0
  236. synth_ai/sdk/api/train/builders.py +1076 -0
  237. synth_ai/sdk/api/train/cli.py +2196 -0
  238. synth_ai/sdk/api/train/config_finder.py +267 -0
  239. synth_ai/sdk/api/train/configs/__init__.py +67 -0
  240. synth_ai/sdk/api/train/configs/prompt_learning.py +1800 -0
  241. synth_ai/sdk/api/train/configs/rl.py +436 -0
  242. synth_ai/sdk/api/train/configs/sft.py +263 -0
  243. synth_ai/sdk/api/train/configs/shared.py +81 -0
  244. synth_ai/sdk/api/train/context_learning.py +312 -0
  245. synth_ai/sdk/api/train/env_resolver.py +418 -0
  246. synth_ai/sdk/api/train/graph_validators.py +216 -0
  247. synth_ai/sdk/api/train/graphgen.py +1102 -0
  248. synth_ai/sdk/api/train/graphgen_models.py +873 -0
  249. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  250. synth_ai/sdk/api/train/local_api.py +10 -0
  251. synth_ai/sdk/api/train/pollers.py +160 -0
  252. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  253. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  254. synth_ai/sdk/api/train/progress/events.py +326 -0
  255. synth_ai/sdk/api/train/progress/results.py +428 -0
  256. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  257. synth_ai/sdk/api/train/prompt_learning.py +800 -0
  258. synth_ai/sdk/api/train/rl.py +478 -0
  259. synth_ai/sdk/api/train/sft.py +398 -0
  260. synth_ai/sdk/api/train/summary.py +522 -0
  261. synth_ai/sdk/api/train/supported_algos.py +147 -0
  262. synth_ai/sdk/api/train/task_app.py +351 -0
  263. synth_ai/sdk/api/train/utils.py +279 -0
  264. synth_ai/sdk/api/train/validators.py +2424 -0
  265. synth_ai/sdk/graphs/__init__.py +15 -0
  266. synth_ai/sdk/graphs/completions.py +776 -0
  267. synth_ai/sdk/graphs/verifier_schemas.py +222 -0
  268. synth_ai/sdk/inference/__init__.py +6 -0
  269. synth_ai/sdk/inference/client.py +128 -0
  270. synth_ai/sdk/jobs/__init__.py +16 -0
  271. synth_ai/sdk/jobs/client.py +371 -0
  272. synth_ai/sdk/learning/__init__.py +99 -0
  273. synth_ai/sdk/learning/client.py +240 -0
  274. synth_ai/sdk/learning/context_learning_client.py +531 -0
  275. synth_ai/sdk/learning/context_learning_types.py +294 -0
  276. synth_ai/sdk/learning/ft_client.py +7 -0
  277. synth_ai/sdk/learning/health.py +49 -0
  278. synth_ai/sdk/learning/jobs.py +202 -0
  279. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  280. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  281. synth_ai/sdk/learning/prompt_learning_types.py +186 -0
  282. synth_ai/sdk/learning/rl/__init__.py +35 -0
  283. synth_ai/sdk/learning/rl/client.py +268 -0
  284. synth_ai/sdk/learning/rl/contracts.py +23 -0
  285. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  286. synth_ai/sdk/learning/rl/secrets.py +13 -0
  287. synth_ai/sdk/learning/sft/client.py +95 -0
  288. synth_ai/sdk/learning/sft/config.py +270 -0
  289. synth_ai/sdk/learning/sft/data.py +698 -0
  290. synth_ai/sdk/learning/validators.py +52 -0
  291. synth_ai/sdk/localapi/__init__.py +40 -0
  292. synth_ai/sdk/localapi/apps/__init__.py +28 -0
  293. synth_ai/sdk/localapi/client.py +10 -0
  294. synth_ai/sdk/localapi/contracts.py +10 -0
  295. synth_ai/sdk/localapi/helpers.py +519 -0
  296. synth_ai/sdk/localapi/rollouts.py +93 -0
  297. synth_ai/sdk/localapi/server.py +29 -0
  298. synth_ai/sdk/localapi/template.py +49 -0
  299. synth_ai/sdk/streaming/__init__.py +35 -0
  300. synth_ai/sdk/streaming/config.py +94 -0
  301. synth_ai/sdk/streaming/handlers.py +1997 -0
  302. synth_ai/sdk/streaming/streamer.py +708 -0
  303. synth_ai/sdk/streaming/types.py +112 -0
  304. synth_ai/sdk/task/__init__.py +164 -0
  305. synth_ai/sdk/task/apps/__init__.py +169 -0
  306. synth_ai/sdk/task/client.py +175 -0
  307. synth_ai/sdk/task/config.py +256 -0
  308. synth_ai/sdk/task/contracts.py +340 -0
  309. synth_ai/sdk/task/datasets.py +108 -0
  310. synth_ai/sdk/task/in_process.py +1200 -0
  311. synth_ai/sdk/task/in_process_runner.py +314 -0
  312. synth_ai/sdk/task/inference_api.py +299 -0
  313. synth_ai/sdk/task/proxy.py +287 -0
  314. synth_ai/sdk/task/rubrics/__init__.py +54 -0
  315. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  316. synth_ai/sdk/task/rubrics/strict.py +148 -0
  317. synth_ai/sdk/task/rubrics.py +219 -0
  318. synth_ai/sdk/task/server.py +640 -0
  319. synth_ai/sdk/task/trace_correlation_helpers.py +557 -0
  320. synth_ai/sdk/task/tracing_utils.py +95 -0
  321. synth_ai/sdk/task/validators.py +441 -0
  322. synth_ai/sdk/training/__init__.py +93 -0
  323. synth_ai/sdk/tunnels/__init__.py +118 -0
  324. synth_ai/sdk/tunnels/cleanup.py +83 -0
  325. synth_ai/sdk/tunnels/ports.py +120 -0
  326. synth_ai/sdk/tunnels/tunneled_api.py +363 -0
  327. synth_ai/utils/__init__.py +213 -0
  328. synth_ai-0.4.4.dist-info/METADATA +262 -0
  329. synth_ai-0.4.4.dist-info/RECORD +369 -0
  330. synth_ai-0.4.4.dist-info/top_level.txt +1 -0
  331. examples/__init__.py +0 -16
  332. examples/analyze_semantic_words.sh +0 -17
  333. examples/crafter_debug_render.py +0 -186
  334. examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
  335. examples/multi_step/configs/README_verilog_rl.md +0 -77
  336. examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
  337. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
  338. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
  339. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
  340. examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
  341. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
  342. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
  343. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
  344. examples/multi_step/configs/crafter_synth_backend.md +0 -40
  345. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
  346. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
  347. examples/multi_step/configs/verilog_rl_lora.toml +0 -190
  348. examples/multi_step/crafter_rl_lora.md +0 -70
  349. examples/multi_step/judges/crafter_backend_judge.py +0 -220
  350. examples/multi_step/judges/verilog_backend_judge.py +0 -234
  351. examples/multi_step/readme.md +0 -48
  352. examples/multi_step/sse_metrics_streaming_notes.md +0 -357
  353. examples/multi_step/task_app_config_notes.md +0 -494
  354. examples/multi_step/verilog_rl_lora.md +0 -218
  355. examples/qwen_coder/README.md +0 -102
  356. examples/qwen_coder/_shared.py +0 -113
  357. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  358. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  359. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  360. examples/qwen_coder/generate_dataset.py +0 -98
  361. examples/qwen_coder/infer_ft_smoke.py +0 -65
  362. examples/qwen_coder/infer_prod_proxy.py +0 -73
  363. examples/qwen_coder/infer_via_synth.py +0 -87
  364. examples/qwen_coder/scripts/infer_coder.sh +0 -19
  365. examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
  366. examples/qwen_coder/sft_full_17b.py +0 -103
  367. examples/qwen_coder/sft_lora_30b.py +0 -110
  368. examples/qwen_coder/subset_jsonl.py +0 -39
  369. examples/qwen_coder/todos.md +0 -38
  370. examples/qwen_coder/validate_jsonl.py +0 -60
  371. examples/rl/README.md +0 -169
  372. examples/rl/download_dataset.py +0 -80
  373. examples/run_crafter_demo.sh +0 -10
  374. examples/sft/README.md +0 -139
  375. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  376. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  377. examples/sft/evaluate.py +0 -119
  378. examples/sft/export_dataset.py +0 -117
  379. examples/sft/generate_traces.py +0 -164
  380. examples/swe/__init__.py +0 -12
  381. examples/swe/task_app/README.md +0 -105
  382. examples/swe/task_app/__init__.py +0 -2
  383. examples/swe/task_app/grpo_swe_mini.py +0 -601
  384. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  385. examples/swe/task_app/hosted/README.md +0 -173
  386. examples/swe/task_app/hosted/__init__.py +0 -5
  387. examples/swe/task_app/hosted/branching.py +0 -143
  388. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  389. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  390. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  391. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  392. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  393. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  394. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  395. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  396. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  397. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  398. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  399. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  400. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  401. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  402. examples/swe/task_app/hosted/hosted_app.py +0 -204
  403. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  404. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  405. examples/swe/task_app/hosted/main.py +0 -100
  406. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  407. examples/swe/task_app/hosted/registry.py +0 -195
  408. examples/swe/task_app/hosted/rollout.py +0 -1911
  409. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  410. examples/swe/task_app/hosted/storage/volume.py +0 -211
  411. examples/swe/task_app/hosted/test_agents.py +0 -161
  412. examples/swe/task_app/hosted/test_service.py +0 -136
  413. examples/swe/task_app/hosted/utils.py +0 -62
  414. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
  415. examples/task_apps/TESTING.md +0 -275
  416. examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
  417. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
  418. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
  419. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
  420. examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
  421. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
  422. examples/task_apps/crafter/__init__.py +0 -0
  423. examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
  424. examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
  425. examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
  426. examples/task_apps/crafter/task_app/README.md +0 -42
  427. examples/task_apps/crafter/task_app/__init__.py +0 -5
  428. examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
  429. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
  430. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
  431. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
  432. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
  433. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  434. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  435. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  436. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  437. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
  438. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
  439. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
  440. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  441. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  442. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
  443. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  444. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
  445. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
  446. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
  447. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
  448. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
  449. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  450. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
  451. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
  452. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
  453. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
  454. examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
  455. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
  456. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
  457. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
  458. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
  459. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
  460. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
  461. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
  462. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
  463. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
  464. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
  465. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
  466. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
  467. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
  468. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
  469. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
  470. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
  471. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
  472. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
  473. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
  474. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  475. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
  476. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
  477. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
  478. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
  479. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  480. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
  481. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
  482. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
  483. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
  484. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
  485. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
  486. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
  487. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
  488. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
  489. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
  490. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
  491. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
  492. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
  493. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
  494. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  495. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
  496. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
  497. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
  498. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
  499. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
  500. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
  501. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
  502. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
  503. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
  504. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
  505. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
  506. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
  507. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
  508. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
  509. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
  510. examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
  511. examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
  512. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
  513. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
  514. examples/task_apps/enron/__init__.py +0 -1
  515. examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
  516. examples/task_apps/enron/filter_sft.toml +0 -5
  517. examples/task_apps/enron/task_app/README.md +0 -14
  518. examples/task_apps/enron/task_app/__init__.py +0 -1
  519. examples/task_apps/enron/task_app/grpo_enron.py +0 -906
  520. examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
  521. examples/task_apps/enron/tests/__init__.py +0 -4
  522. examples/task_apps/enron/tests/conftest.py +0 -115
  523. examples/task_apps/enron/tests/integration/__init__.py +0 -4
  524. examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
  525. examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
  526. examples/task_apps/enron/tests/unit/__init__.py +0 -4
  527. examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
  528. examples/task_apps/math/README.md +0 -22
  529. examples/task_apps/math/__init__.py +0 -0
  530. examples/task_apps/math/math_single_step.py +0 -1000
  531. examples/task_apps/math/math_task_app.py +0 -115
  532. examples/task_apps/pokemon_battle/__init__.py +0 -2
  533. examples/task_apps/pokemon_battle/modal_app.py +0 -104
  534. examples/task_apps/pokemon_battle/task_app/README.md +0 -68
  535. examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
  536. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
  537. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
  538. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
  539. examples/task_apps/pokemon_red/README.md +0 -357
  540. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
  541. examples/task_apps/pokemon_red/__init__.py +0 -3
  542. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
  543. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
  544. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
  545. examples/task_apps/pokemon_red/task_app.py +0 -799
  546. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
  547. examples/task_apps/sokoban/README.md +0 -307
  548. examples/task_apps/sokoban/__init__.py +0 -3
  549. examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
  550. examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
  551. examples/task_apps/sokoban/filter_sft.toml +0 -5
  552. examples/task_apps/sokoban/task_app.py +0 -1058
  553. examples/task_apps/sokoban/tests/__init__.py +0 -4
  554. examples/task_apps/sokoban/tests/conftest.py +0 -113
  555. examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
  556. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
  557. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
  558. examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
  559. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
  560. examples/task_apps/verilog/__init__.py +0 -1
  561. examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
  562. examples/task_apps/verilog/filter_sft.toml +0 -5
  563. examples/task_apps/verilog/task_app/README.md +0 -12
  564. examples/task_apps/verilog/task_app/__init__.py +0 -1
  565. examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
  566. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
  567. examples/task_apps/verilog/tests/__init__.py +0 -4
  568. examples/task_apps/verilog/tests/conftest.py +0 -115
  569. examples/task_apps/verilog/tests/integration/__init__.py +0 -4
  570. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
  571. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
  572. examples/task_apps/verilog/tests/unit/__init__.py +0 -4
  573. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
  574. examples/vlm/PROPOSAL.md +0 -53
  575. examples/vlm/README.md +0 -68
  576. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  577. examples/vlm/crafter_image_only_agent.py +0 -207
  578. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  579. examples/vlm/filter_image_rows.py +0 -63
  580. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  581. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  582. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  583. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  584. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  585. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  586. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  587. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
  588. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
  589. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
  590. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
  591. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  592. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  593. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  594. examples/warming_up_to_rl/groq_test.py +0 -97
  595. examples/warming_up_to_rl/manage_secrets.py +0 -131
  596. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  597. examples/warming_up_to_rl/old/notes.md +0 -73
  598. examples/warming_up_to_rl/readme.md +0 -179
  599. examples/warming_up_to_rl/run_eval.py +0 -736
  600. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  601. examples/warming_up_to_rl/run_local_rollout.py +0 -239
  602. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
  603. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
  604. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
  605. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  606. examples/warming_up_to_rl/run_rollout_remote.py +0 -156
  607. examples/workflows/__init__.py +0 -0
  608. examples/workflows/math_rl/__init__.py +0 -0
  609. examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
  610. examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
  611. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
  612. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
  613. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
  614. examples/workflows/math_rl/download_dataset.py +0 -80
  615. examples/workflows/math_rl/run_eval.py +0 -436
  616. examples/workflows/math_rl/run_rl_and_save.py +0 -111
  617. synth_ai/api/models/supported.py +0 -377
  618. synth_ai/api/train/__init__.py +0 -5
  619. synth_ai/api/train/builders.py +0 -351
  620. synth_ai/api/train/cli.py +0 -635
  621. synth_ai/api/train/config_finder.py +0 -228
  622. synth_ai/api/train/configs/__init__.py +0 -44
  623. synth_ai/api/train/configs/rl.py +0 -134
  624. synth_ai/api/train/configs/sft.py +0 -95
  625. synth_ai/api/train/configs/shared.py +0 -24
  626. synth_ai/api/train/env_resolver.py +0 -349
  627. synth_ai/api/train/pollers.py +0 -75
  628. synth_ai/api/train/supported_algos.py +0 -147
  629. synth_ai/api/train/task_app.py +0 -195
  630. synth_ai/api/train/utils.py +0 -225
  631. synth_ai/cli/_modal_wrapper.py +0 -29
  632. synth_ai/cli/_storage.py +0 -20
  633. synth_ai/cli/_typer_patch.py +0 -49
  634. synth_ai/cli/_validate_task_app.py +0 -11
  635. synth_ai/cli/balance.py +0 -216
  636. synth_ai/cli/calc.py +0 -84
  637. synth_ai/cli/demo.py +0 -165
  638. synth_ai/cli/legacy_root_backup.py +0 -468
  639. synth_ai/cli/man.py +0 -106
  640. synth_ai/cli/recent.py +0 -132
  641. synth_ai/cli/rl_demo.py +0 -254
  642. synth_ai/cli/status.py +0 -134
  643. synth_ai/cli/task_apps.py +0 -4523
  644. synth_ai/cli/traces.py +0 -164
  645. synth_ai/cli/tui.py +0 -57
  646. synth_ai/cli/watch.py +0 -506
  647. synth_ai/compound/cais.py +0 -0
  648. synth_ai/config/base_url.py +0 -107
  649. synth_ai/core/experiment.py +0 -13
  650. synth_ai/core/system.py +0 -15
  651. synth_ai/demo_registry.py +0 -295
  652. synth_ai/demos/core/__init__.py +0 -1
  653. synth_ai/demos/core/cli.py +0 -1718
  654. synth_ai/demos/demo_task_apps/core.py +0 -440
  655. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
  656. synth_ai/demos/demo_task_apps/math/config.toml +0 -74
  657. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  658. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  659. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  660. synth_ai/environments/__init__.py +0 -31
  661. synth_ai/environments/environment/__init__.py +0 -1
  662. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  663. synth_ai/environments/environment/artifacts/base.py +0 -52
  664. synth_ai/environments/environment/core.py +0 -67
  665. synth_ai/environments/environment/db/__init__.py +0 -1
  666. synth_ai/environments/environment/db/sqlite.py +0 -45
  667. synth_ai/environments/environment/registry.py +0 -233
  668. synth_ai/environments/environment/resources/sqlite.py +0 -45
  669. synth_ai/environments/environment/results.py +0 -1
  670. synth_ai/environments/environment/rewards/__init__.py +0 -1
  671. synth_ai/environments/environment/rewards/core.py +0 -29
  672. synth_ai/environments/environment/shared_engine.py +0 -26
  673. synth_ai/environments/environment/tools/__init__.py +0 -200
  674. synth_ai/environments/examples/__init__.py +0 -1
  675. synth_ai/environments/examples/bandit/__init__.py +0 -33
  676. synth_ai/environments/examples/bandit/engine.py +0 -302
  677. synth_ai/environments/examples/bandit/environment.py +0 -194
  678. synth_ai/environments/examples/bandit/taskset.py +0 -200
  679. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  680. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  681. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  682. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  683. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  684. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  685. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  686. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  687. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  688. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  689. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  690. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  691. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  692. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  693. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  694. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  695. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  696. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  697. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  698. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  699. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  700. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  701. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  702. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  703. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  704. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  705. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  706. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  707. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  708. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  709. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  710. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  711. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  712. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  713. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  714. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  715. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  716. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  717. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  718. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  719. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  720. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  721. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  722. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  723. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  724. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  725. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  726. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  727. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  728. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  729. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  730. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  731. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  732. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  733. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  734. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  735. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  736. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  737. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  738. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  739. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  740. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  741. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  742. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  743. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  744. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  745. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  746. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  747. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  748. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  749. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  750. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  751. synth_ai/environments/examples/crafter_classic/environment.py +0 -495
  752. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  753. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  754. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  755. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  756. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  757. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  758. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  759. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  760. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  761. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  762. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  763. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  764. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  765. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  766. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  767. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  768. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  769. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  770. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  771. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  772. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  773. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  774. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  775. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  776. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  777. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  778. synth_ai/environments/examples/enron/engine.py +0 -300
  779. synth_ai/environments/examples/enron/environment.py +0 -234
  780. synth_ai/environments/examples/enron/taskset.py +0 -112
  781. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  782. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  783. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  784. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  785. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  786. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  787. synth_ai/environments/examples/minigrid/engine.py +0 -589
  788. synth_ai/environments/examples/minigrid/environment.py +0 -274
  789. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  790. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  791. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  792. synth_ai/environments/examples/nethack/__init__.py +0 -7
  793. synth_ai/environments/examples/nethack/achievements.py +0 -337
  794. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  795. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  796. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  797. synth_ai/environments/examples/nethack/engine.py +0 -739
  798. synth_ai/environments/examples/nethack/environment.py +0 -256
  799. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  800. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  801. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  802. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  803. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  804. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  805. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  806. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  807. synth_ai/environments/examples/nethack/taskset.py +0 -323
  808. synth_ai/environments/examples/red/__init__.py +0 -7
  809. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  810. synth_ai/environments/examples/red/config_logging.py +0 -110
  811. synth_ai/environments/examples/red/engine.py +0 -721
  812. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  813. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
  814. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  815. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  816. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  817. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  818. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  819. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  820. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  821. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  822. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  823. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
  824. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  825. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  826. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  827. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  828. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  829. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
  830. synth_ai/environments/examples/red/environment.py +0 -298
  831. synth_ai/environments/examples/red/taskset.py +0 -79
  832. synth_ai/environments/examples/red/units/__init__.py +0 -1
  833. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  834. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  835. synth_ai/environments/examples/sokoban/engine.py +0 -678
  836. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  837. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  838. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  839. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  840. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  841. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  842. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  843. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  844. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  845. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  846. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  847. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  848. synth_ai/environments/examples/sokoban/environment.py +0 -229
  849. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  850. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  851. synth_ai/environments/examples/sokoban/taskset.py +0 -544
  852. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  853. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  854. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  855. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  856. synth_ai/environments/examples/verilog/__init__.py +0 -10
  857. synth_ai/environments/examples/verilog/engine.py +0 -421
  858. synth_ai/environments/examples/verilog/environment.py +0 -350
  859. synth_ai/environments/examples/verilog/taskset.py +0 -420
  860. synth_ai/environments/examples/wordle/__init__.py +0 -29
  861. synth_ai/environments/examples/wordle/engine.py +0 -398
  862. synth_ai/environments/examples/wordle/environment.py +0 -159
  863. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  864. synth_ai/environments/examples/wordle/taskset.py +0 -230
  865. synth_ai/environments/reproducibility/core.py +0 -42
  866. synth_ai/environments/reproducibility/helpers.py +0 -0
  867. synth_ai/environments/reproducibility/tree.py +0 -363
  868. synth_ai/environments/service/app.py +0 -97
  869. synth_ai/environments/service/core_routes.py +0 -1021
  870. synth_ai/environments/service/external_registry.py +0 -56
  871. synth_ai/environments/service/registry.py +0 -9
  872. synth_ai/environments/stateful/__init__.py +0 -1
  873. synth_ai/environments/stateful/core.py +0 -163
  874. synth_ai/environments/stateful/engine.py +0 -21
  875. synth_ai/environments/stateful/state.py +0 -7
  876. synth_ai/environments/tasks/api.py +0 -19
  877. synth_ai/environments/tasks/core.py +0 -81
  878. synth_ai/environments/tasks/filters.py +0 -40
  879. synth_ai/environments/tasks/utils.py +0 -90
  880. synth_ai/environments/v0_observability/history.py +0 -3
  881. synth_ai/environments/v0_observability/log.py +0 -2
  882. synth_ai/evals/__init__.py +0 -15
  883. synth_ai/evals/base.py +0 -13
  884. synth_ai/evals/client.py +0 -82
  885. synth_ai/evals/types.py +0 -42
  886. synth_ai/handshake.py +0 -109
  887. synth_ai/http.py +0 -26
  888. synth_ai/http_client.py +0 -136
  889. synth_ai/inference/__init__.py +0 -5
  890. synth_ai/inference/client.py +0 -34
  891. synth_ai/jobs/client.py +0 -295
  892. synth_ai/judge_schemas.py +0 -127
  893. synth_ai/learning/__init__.py +0 -59
  894. synth_ai/learning/client.py +0 -241
  895. synth_ai/learning/ft_client.py +0 -7
  896. synth_ai/learning/health.py +0 -49
  897. synth_ai/learning/jobs.py +0 -201
  898. synth_ai/learning/rl/__init__.py +0 -39
  899. synth_ai/learning/rl/client.py +0 -267
  900. synth_ai/learning/rl/contracts.py +0 -27
  901. synth_ai/learning/rl/env_keys.py +0 -166
  902. synth_ai/learning/rl/secrets.py +0 -13
  903. synth_ai/learning/sft/client.py +0 -68
  904. synth_ai/learning/sft/config.py +0 -270
  905. synth_ai/learning/sft/data.py +0 -295
  906. synth_ai/learning/validators.py +0 -49
  907. synth_ai/lm/__init__.py +0 -25
  908. synth_ai/task/__init__.py +0 -121
  909. synth_ai/task/apps/__init__.py +0 -129
  910. synth_ai/task/client.py +0 -167
  911. synth_ai/task/config.py +0 -257
  912. synth_ai/task/contracts.py +0 -236
  913. synth_ai/task/datasets.py +0 -108
  914. synth_ai/task/proxy.py +0 -251
  915. synth_ai/task/rubrics/__init__.py +0 -56
  916. synth_ai/task/rubrics/loaders.py +0 -152
  917. synth_ai/task/rubrics/strict.py +0 -149
  918. synth_ai/task/server.py +0 -432
  919. synth_ai/task/trace_correlation_helpers.py +0 -315
  920. synth_ai/task/tracing_utils.py +0 -84
  921. synth_ai/task/validators.py +0 -418
  922. synth_ai/tracing_v3/__init__.py +0 -97
  923. synth_ai/tracing_v3/abstractions.py +0 -302
  924. synth_ai/tracing_v3/config.py +0 -84
  925. synth_ai/tracing_v3/db_config.py +0 -194
  926. synth_ai/tracing_v3/decorators.py +0 -398
  927. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
  928. synth_ai/tracing_v3/migration_helper.py +0 -120
  929. synth_ai/tracing_v3/session_tracer.py +0 -540
  930. synth_ai/tracing_v3/storage/base.py +0 -210
  931. synth_ai/tracing_v3/storage/config.py +0 -75
  932. synth_ai/tracing_v3/storage/factory.py +0 -39
  933. synth_ai/tracing_v3/trace_utils.py +0 -317
  934. synth_ai/tracing_v3/turso/daemon.py +0 -151
  935. synth_ai/tracing_v3/turso/models.py +0 -469
  936. synth_ai/tracing_v3/turso/native_manager.py +0 -1209
  937. synth_ai/tracing_v3/utils.py +0 -108
  938. synth_ai/tui/__init__.py +0 -5
  939. synth_ai/tui/__main__.py +0 -13
  940. synth_ai/tui/cli/__init__.py +0 -1
  941. synth_ai/tui/cli/query_experiments.py +0 -164
  942. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  943. synth_ai/tui/dashboard.py +0 -906
  944. synth_ai/v0/api/__init__.py +0 -8
  945. synth_ai/v0/api/models/__init__.py +0 -8
  946. synth_ai/v0/api/models/supported.py +0 -8
  947. synth_ai/v0/config/__init__.py +0 -15
  948. synth_ai/v0/config/base_url.py +0 -12
  949. synth_ai/v0/lm/__init__.py +0 -51
  950. synth_ai/v0/lm/caching/__init__.py +0 -0
  951. synth_ai/v0/lm/caching/constants.py +0 -6
  952. synth_ai/v0/lm/caching/dbs.py +0 -0
  953. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  954. synth_ai/v0/lm/caching/handler.py +0 -137
  955. synth_ai/v0/lm/caching/initialize.py +0 -11
  956. synth_ai/v0/lm/caching/persistent.py +0 -114
  957. synth_ai/v0/lm/config.py +0 -115
  958. synth_ai/v0/lm/constants.py +0 -32
  959. synth_ai/v0/lm/core/__init__.py +0 -8
  960. synth_ai/v0/lm/core/all.py +0 -73
  961. synth_ai/v0/lm/core/exceptions.py +0 -5
  962. synth_ai/v0/lm/core/main.py +0 -331
  963. synth_ai/v0/lm/core/main_v3.py +0 -594
  964. synth_ai/v0/lm/core/synth_models.py +0 -35
  965. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  966. synth_ai/v0/lm/cost/__init__.py +0 -0
  967. synth_ai/v0/lm/cost/monitor.py +0 -1
  968. synth_ai/v0/lm/cost/statefulness.py +0 -1
  969. synth_ai/v0/lm/injection.py +0 -80
  970. synth_ai/v0/lm/overrides.py +0 -206
  971. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  972. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  973. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  974. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  975. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  976. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  977. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  978. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  979. synth_ai/v0/lm/tools/__init__.py +0 -3
  980. synth_ai/v0/lm/tools/base.py +0 -172
  981. synth_ai/v0/lm/unified_interface.py +0 -202
  982. synth_ai/v0/lm/vendors/__init__.py +0 -0
  983. synth_ai/v0/lm/vendors/base.py +0 -81
  984. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  985. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  986. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  987. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  988. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  989. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  990. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  991. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  992. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  993. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  994. synth_ai/v0/lm/vendors/retries.py +0 -22
  995. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  996. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  997. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  998. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  999. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  1000. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  1001. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  1002. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  1003. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  1004. synth_ai/v0/lm/warmup.py +0 -186
  1005. synth_ai/v0/tracing/__init__.py +0 -0
  1006. synth_ai/v0/tracing/abstractions.py +0 -224
  1007. synth_ai/v0/tracing/base_client.py +0 -91
  1008. synth_ai/v0/tracing/client_manager.py +0 -131
  1009. synth_ai/v0/tracing/config.py +0 -142
  1010. synth_ai/v0/tracing/context.py +0 -146
  1011. synth_ai/v0/tracing/decorators.py +0 -682
  1012. synth_ai/v0/tracing/events/__init__.py +0 -0
  1013. synth_ai/v0/tracing/events/manage.py +0 -147
  1014. synth_ai/v0/tracing/events/scope.py +0 -86
  1015. synth_ai/v0/tracing/events/store.py +0 -228
  1016. synth_ai/v0/tracing/immediate_client.py +0 -151
  1017. synth_ai/v0/tracing/local.py +0 -18
  1018. synth_ai/v0/tracing/log_client_base.py +0 -73
  1019. synth_ai/v0/tracing/retry_queue.py +0 -186
  1020. synth_ai/v0/tracing/trackers.py +0 -515
  1021. synth_ai/v0/tracing/upload.py +0 -409
  1022. synth_ai/v0/tracing/utils.py +0 -9
  1023. synth_ai/v0/tracing_v1/__init__.py +0 -16
  1024. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  1025. synth_ai/v0/tracing_v1/base_client.py +0 -91
  1026. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  1027. synth_ai/v0/tracing_v1/config.py +0 -142
  1028. synth_ai/v0/tracing_v1/context.py +0 -146
  1029. synth_ai/v0/tracing_v1/decorators.py +0 -703
  1030. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  1031. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  1032. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  1033. synth_ai/v0/tracing_v1/events/store.py +0 -228
  1034. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  1035. synth_ai/v0/tracing_v1/local.py +0 -18
  1036. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  1037. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  1038. synth_ai/v0/tracing_v1/trackers.py +0 -515
  1039. synth_ai/v0/tracing_v1/upload.py +0 -527
  1040. synth_ai/v0/tracing_v1/utils.py +0 -9
  1041. synth_ai/v0/tracing_v3/__init__.py +0 -10
  1042. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  1043. synth_ai/v0/tracing_v3/decorators.py +0 -3
  1044. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  1045. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  1046. synth_ai-0.2.14.dist-info/METADATA +0 -139
  1047. synth_ai-0.2.14.dist-info/RECORD +0 -762
  1048. synth_ai-0.2.14.dist-info/top_level.txt +0 -2
  1049. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  1050. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  1051. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  1052. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  1053. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  1054. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  1055. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  1056. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  1057. {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
  1058. /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
  1059. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  1060. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  1061. /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
  1062. /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
  1063. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  1064. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  1065. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  1066. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
  1067. /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
  1068. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  1069. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  1070. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  1071. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  1072. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  1073. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  1074. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  1075. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  1076. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  1077. /synth_ai/{task → sdk/task}/auth.py +0 -0
  1078. /synth_ai/{task → sdk/task}/errors.py +0 -0
  1079. /synth_ai/{task → sdk/task}/health.py +0 -0
  1080. /synth_ai/{task → sdk/task}/json.py +0 -0
  1081. /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
  1082. /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
  1083. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  1084. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
  1085. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
  1086. {synth_ai-0.2.14.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1984 @@
1
+ """Celery task definitions for running experiment jobs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import re
8
+ import shlex
9
+ import subprocess
10
+ import sys
11
+ import threading
12
+ import time
13
+ from datetime import UTC, datetime
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from celery.utils.log import get_task_logger
18
+ from dotenv import load_dotenv
19
+
20
+ from .api_schemas import BackendEventsResponse
21
+ from .celery_app import celery_app
22
+ from .config import load_config
23
+ from .config_utils import PreparedConfig, prepare_config_file
24
+ from .database import session_scope
25
+ from .dispatcher import dispatch_available_jobs
26
+ from .models import (
27
+ Experiment,
28
+ ExperimentJob,
29
+ ExperimentJobStatus,
30
+ ExperimentStatus,
31
+ JobExecutionLog,
32
+ )
33
+ from .results import ResultSummary, collect_result_summary
34
+ from .status import ExperimentStatusTracker
35
+ from .status_tracker import extract_config_info, update_status_from_output
36
+ from .trace_storage import persist_trials_from_summary, update_experiment_metadata
37
+
38
+ logger = get_task_logger(__name__)
39
+
40
+
41
+ TRAIN_COMMAND_ENV = "EXPERIMENT_QUEUE_TRAIN_CMD"
42
+
43
+
44
+ def _load_synth_api_key() -> str:
45
+ """Load SYNTH_API_KEY from .env file and fail loudly if not found.
46
+
47
+ Never falls back to other sources - must be explicitly set in .env file.
48
+
49
+ Returns:
50
+ The API key as a string.
51
+
52
+ Raises:
53
+ RuntimeError: If SYNTH_API_KEY is not found in .env file.
54
+ """
55
+ # Find .env file - check synth-ai root first, then current directory
56
+ repo_root = Path(__file__).resolve().parents[3] # synth_ai/experiment_queue/tasks.py -> synth-ai/
57
+ env_file = repo_root / ".env"
58
+
59
+ if not env_file.exists():
60
+ # Try current directory as fallback
61
+ env_file = Path(".env")
62
+
63
+ if env_file.exists():
64
+ load_dotenv(env_file, override=False) # Don't override existing env vars
65
+
66
+ api_key = os.getenv("SYNTH_API_KEY")
67
+
68
+ if not api_key:
69
+ raise RuntimeError(
70
+ f"❌ SYNTH_API_KEY not found! "
71
+ f"Please set it in {env_file.resolve() if env_file.exists() else 'synth-ai/.env'}. "
72
+ f"No fallback - API key must be explicitly set."
73
+ )
74
+
75
+ return api_key
76
+
77
+
78
+ def _find_venv_python() -> str:
79
+ """Find the venv Python executable to avoid uv cache permission issues.
80
+
81
+ Checks in order:
82
+ 1. sys.executable if already in a venv
83
+ 2. .venv/bin/python relative to current working directory
84
+ 3. .venv/bin/python relative to repo root (if synth_ai package is installed)
85
+ 4. Falls back to 'python' if venv not found
86
+ """
87
+ # If we're already running in a venv, use that
88
+ if sys.executable and ("venv" in sys.executable or ".venv" in sys.executable):
89
+ return sys.executable
90
+
91
+ # Check .venv/bin/python relative to current working directory
92
+ cwd_venv = Path.cwd() / ".venv" / "bin" / "python"
93
+ if cwd_venv.exists() and os.access(cwd_venv, os.X_OK):
94
+ return str(cwd_venv)
95
+
96
+ # Check .venv/bin/python relative to synth_ai package location
97
+ try:
98
+ import synth_ai
99
+
100
+ package_path = Path(synth_ai.__file__ or Path(__file__).resolve()).parent.parent.parent
101
+ pkg_venv = package_path / ".venv" / "bin" / "python"
102
+ if pkg_venv.exists() and os.access(pkg_venv, os.X_OK):
103
+ return str(pkg_venv)
104
+ except Exception:
105
+ pass
106
+
107
+ # Fallback to system python
108
+ return "python"
109
+
110
+
111
+ def _get_default_train_cmd() -> str:
112
+ """Get the default training command, evaluating venv path lazily.
113
+
114
+ This is called when building the command, not at module import time,
115
+ so it can properly detect the venv based on the current working directory.
116
+ """
117
+ return f"{_find_venv_python()} -m synth_ai.cli train"
118
+
119
+
120
+ def _extract_backend_job_id(output: str) -> str | None:
121
+ """Extract backend job ID from subprocess output.
122
+
123
+ Looks for patterns like:
124
+ - JSON: "job_id": "pl_xxxxx"
125
+ - Pattern: pl_[a-f0-9]+
126
+
127
+ Args:
128
+ output: Subprocess stdout/stderr output
129
+
130
+ Returns:
131
+ Backend job ID if found, None otherwise
132
+
133
+ Raises:
134
+ AssertionError: If extracted ID doesn't match expected format
135
+ """
136
+ if not output:
137
+ return None
138
+
139
+ # Assert output is a string
140
+ assert isinstance(output, str), f"Expected str, got {type(output).__name__}"
141
+
142
+ # Look for job_id in JSON response
143
+ match = re.search(r'"job_id"\s*:\s*"([^"]+)"', output)
144
+ if match:
145
+ job_id = match.group(1)
146
+ # Validate format
147
+ assert job_id.startswith("pl_"), f"Extracted job_id doesn't match expected format 'pl_*': {job_id}"
148
+ assert len(job_id) > 3, f"Extracted job_id too short: {job_id}"
149
+ return job_id
150
+
151
+ # Try pattern pl_xxxxx
152
+ match = re.search(r'pl_[a-f0-9]+', output)
153
+ if match:
154
+ job_id = match.group(0)
155
+ # Validate format
156
+ assert job_id.startswith("pl_"), f"Extracted job_id doesn't match expected format 'pl_*': {job_id}"
157
+ assert len(job_id) > 3, f"Extracted job_id too short: {job_id}"
158
+ return job_id
159
+
160
+ return None
161
+
162
+
163
+ def _poll_backend_progress(
164
+ backend_job_id: str,
165
+ status_tracker: ExperimentStatusTracker,
166
+ policy: str | None,
167
+ environment: str | None,
168
+ backend_url: str,
169
+ api_key: str,
170
+ stop_event: threading.Event,
171
+ job_start_time: float | None = None,
172
+ ) -> None:
173
+ """Poll backend API for progress events and update status_json.
174
+
175
+ Polls the backend API endpoint `/prompt-learning/online/jobs/{backend_job_id}/events`
176
+ every 5 seconds to fetch `prompt.learning.progress` events containing rollouts,
177
+ ETA, and best score information. Updates the experiment status_json in real-time.
178
+
179
+ Backend URL Configuration:
180
+ - Default: Production (https://api.usesynth.ai/api)
181
+ - Local: Set EXPERIMENT_QUEUE_LOCAL=true or use --local flag (http://localhost:8000/api)
182
+ - Custom: Set EXPERIMENT_QUEUE_BACKEND_URL env var
183
+
184
+ Args:
185
+ backend_job_id: Backend job ID to poll (e.g., "pl_xxxxx")
186
+ status_tracker: ExperimentStatusTracker instance for updating status_json
187
+ policy: Policy model name (e.g., "gpt-4", "llama-3.1-8b-instant")
188
+ environment: Environment name (e.g., "heartdisease", "hotpotqa")
189
+ backend_url: Backend API base URL (from config.backend_url)
190
+ api_key: API key for authentication (from SYNTH_API_KEY env var)
191
+ stop_event: Threading event to signal when to stop polling
192
+ """
193
+ import logging
194
+ import os
195
+
196
+ import requests
197
+
198
+ # Import BackendJobEvent locally to ensure it's available in this function's scope
199
+ from .api_schemas import BackendJobEvent # noqa: F811
200
+
201
+ # Get logger for this thread (logger from parent thread may not work correctly)
202
+ poller_logger = logging.getLogger(f"synth_ai.cli.local.experiment_queue.poller.{backend_job_id}")
203
+
204
+ # Set log level from environment variable if set (allows --loglevel flag to control verbosity)
205
+ # Use Celery's logger hierarchy instead of creating our own handler to avoid duplicates
206
+ log_level_env = os.getenv("EXPERIMENT_QUEUE_LOG_LEVEL", "INFO").upper()
207
+ try:
208
+ log_level = getattr(logging, log_level_env)
209
+ poller_logger.setLevel(log_level)
210
+ # Don't create handlers - let Celery's logging handle it
211
+ # Just propagate to parent logger (Celery's task logger)
212
+ poller_logger.propagate = True
213
+ except (AttributeError, ValueError):
214
+ # Invalid log level, use default
215
+ pass
216
+
217
+ # Validate inputs with assertions
218
+ assert backend_job_id, "backend_job_id cannot be empty"
219
+ assert backend_job_id.startswith("pl_"), f"Invalid backend_job_id format: expected 'pl_*', got '{backend_job_id}'"
220
+ assert backend_url, "backend_url cannot be empty"
221
+ assert backend_url.startswith(("http://", "https://")), f"Invalid backend_url format: {backend_url}"
222
+ assert api_key, "api_key cannot be empty"
223
+ assert status_tracker is not None, "status_tracker cannot be None"
224
+ assert stop_event is not None, "stop_event cannot be None"
225
+
226
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{backend_job_id}/events"
227
+ headers = {"Authorization": f"Bearer {api_key}"}
228
+ last_seq = 0
229
+ progress_start_time: float | None = None # Track when we first see progress
230
+ consecutive_timeouts = 0 # Track consecutive timeouts for exponential backoff
231
+ base_poll_interval = 5.0 # Base polling interval in seconds
232
+
233
+ # ✅ ADD: Track last progress update time to detect stuck jobs
234
+ last_progress_time: float | None = None
235
+ last_rollouts_completed: int | None = None
236
+ last_progress_seq = 0
237
+ stuck_threshold_seconds = 600.0 # 10 minutes without progress = stuck
238
+
239
+ poller_logger.info("📡 Starting progress poller for backend job %s (URL: %s)", backend_job_id, url)
240
+
241
+ while not stop_event.is_set():
242
+ events_received = 0
243
+ try:
244
+ # Assert URL is valid before making request
245
+ assert url.startswith(("http://", "https://")), f"Invalid URL format: {url}"
246
+
247
+ poller_logger.info("Polling backend API: %s (since_seq: %d)", url, last_seq)
248
+
249
+ try:
250
+ resp = requests.get(
251
+ url,
252
+ headers=headers,
253
+ params={"since_seq": last_seq, "limit": 100},
254
+ timeout=120, # Increased to 120s to handle slow backend/PostgREST responses
255
+ )
256
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
257
+ # ✅ ADD: Detect connection pool exhaustion in poller
258
+ error_str = str(e).lower()
259
+ is_pool_exhausted = (
260
+ "connection" in error_str
261
+ or "timeout" in error_str
262
+ or "refused" in error_str
263
+ )
264
+ if is_pool_exhausted:
265
+ # 🔥 VERY LOUD ERROR MESSAGES FOR CONNECTION POOL ISSUES IN POLLER
266
+ print("=" * 100, flush=True)
267
+ print("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER) 🔥🔥🔥", flush=True)
268
+ print("=" * 100, flush=True)
269
+ print(f"Backend Job ID: {backend_job_id}", flush=True)
270
+ print(f"URL: {url}", flush=True)
271
+ print(f"Error: {type(e).__name__}: {str(e)}", flush=True)
272
+ print("=" * 100, flush=True)
273
+ print("⚠️ Cannot fetch events - connection pool may be exhausted!", flush=True)
274
+ print("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables", flush=True)
275
+ print("=" * 100, flush=True)
276
+
277
+ poller_logger.error("=" * 100)
278
+ poller_logger.error("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER) 🔥🔥🔥")
279
+ poller_logger.error("=" * 100)
280
+ poller_logger.error("Backend Job ID: %s | URL: %s", backend_job_id, url)
281
+ poller_logger.error("Error: %s: %s", type(e).__name__, str(e))
282
+ poller_logger.error("⚠️ Cannot fetch events - connection pool may be exhausted!")
283
+ poller_logger.error("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables")
284
+ poller_logger.error("=" * 100)
285
+ raise
286
+
287
+ # Assert we got a response object
288
+ assert resp is not None, "requests.get() returned None"
289
+
290
+ poller_logger.info("API response: status=%d, content_length=%d", resp.status_code, len(resp.content))
291
+
292
+ # ✅ ADD: Detect connection pool exhaustion in HTTP error responses
293
+ if resp.status_code not in (200, 201):
294
+ body_text = (resp.text or "")[:500].lower()
295
+ is_pool_exhausted = (
296
+ resp.status_code == 503 # Service Unavailable
297
+ or resp.status_code == 429 # Too Many Requests (after long wait)
298
+ or "connection pool" in body_text
299
+ or "too many clients" in body_text
300
+ or "maxclients" in body_text
301
+ or "max clients" in body_text
302
+ or "connection refused" in body_text
303
+ )
304
+
305
+ if is_pool_exhausted:
306
+ # 🔥 VERY LOUD ERROR MESSAGES FOR CONNECTION POOL ISSUES IN POLLER
307
+ print("=" * 100, flush=True)
308
+ print("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER HTTP ERROR) 🔥🔥🔥", flush=True)
309
+ print("=" * 100, flush=True)
310
+ print(f"Backend Job ID: {backend_job_id}", flush=True)
311
+ print(f"URL: {url}", flush=True)
312
+ print(f"HTTP Status: {resp.status_code}", flush=True)
313
+ print(f"Response Body: {resp.text[:500]}", flush=True)
314
+ print("=" * 100, flush=True)
315
+ print("⚠️ Cannot fetch events - connection pool may be exhausted!", flush=True)
316
+ print("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables", flush=True)
317
+ print("=" * 100, flush=True)
318
+
319
+ poller_logger.error("=" * 100)
320
+ poller_logger.error("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER HTTP ERROR) 🔥🔥🔥")
321
+ poller_logger.error("=" * 100)
322
+ poller_logger.error("Backend Job ID: %s | URL: %s | HTTP: %d", backend_job_id, url, resp.status_code)
323
+ poller_logger.error("Response Body: %s", resp.text[:500])
324
+ poller_logger.error("⚠️ Cannot fetch events - connection pool may be exhausted!")
325
+ poller_logger.error("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables")
326
+ poller_logger.error("=" * 100)
327
+
328
+ if resp.status_code == 200:
329
+ # Parse and validate API response using Pydantic models
330
+ try:
331
+ raw_data = resp.json()
332
+ # Assert response is not None
333
+ assert raw_data is not None, "API returned None response"
334
+
335
+ # Parse response with validation
336
+ assert isinstance(raw_data, dict | list), (
337
+ f"API response must be dict or list, got {type(raw_data).__name__}: {raw_data}"
338
+ )
339
+
340
+ events_response = BackendEventsResponse.parse_response(raw_data)
341
+ assert isinstance(events_response, BackendEventsResponse), (
342
+ f"parse_response returned wrong type: {type(events_response).__name__}"
343
+ )
344
+ assert isinstance(events_response.events, list), (
345
+ f"events_response.events must be list, got {type(events_response.events).__name__}"
346
+ )
347
+
348
+ events_received = len(events_response.events)
349
+ assert events_received >= 0, (
350
+ f"events_received must be >= 0, got {events_received}"
351
+ )
352
+
353
+ # Process each event
354
+ event_types_seen: dict[str, int] = {}
355
+ for idx, event in enumerate(events_response.events):
356
+ # Assert event is BackendJobEvent instance
357
+ assert isinstance(event, BackendJobEvent), (
358
+ f"Event at index {idx} must be BackendJobEvent, got {type(event).__name__}"
359
+ )
360
+ # Assert event has required fields
361
+ assert event.seq >= 0, f"Invalid seq: {event.seq}"
362
+ assert event.type, f"Event missing type field: {event}"
363
+ assert event.message, f"Event missing message field: {event}"
364
+
365
+ # Track event types for debugging
366
+ event_types_seen[event.type] = event_types_seen.get(event.type, 0) + 1
367
+
368
+ # Check if this is a progress event
369
+ if event.type == "prompt.learning.progress":
370
+ poller_logger.info(
371
+ "Found progress event seq=%d: %s",
372
+ event.seq,
373
+ event.message[:100],
374
+ )
375
+ # Extract progress data with validation
376
+ progress_data = event.get_progress_data()
377
+ if progress_data is None:
378
+ poller_logger.warning(
379
+ "Progress event seq=%d has no parseable data. Event data: %s",
380
+ event.seq,
381
+ event.data,
382
+ )
383
+ continue
384
+
385
+ poller_logger.debug(
386
+ "Progress event seq=%d data: rollouts_completed=%s, rollouts_total=%s, best_score=%s, eta=%s",
387
+ event.seq,
388
+ progress_data.rollouts_completed,
389
+ progress_data.effective_rollouts_total,
390
+ progress_data.effective_best_score,
391
+ progress_data.eta_seconds,
392
+ )
393
+
394
+ # Use effective getters that handle field name variations
395
+ rollouts_completed = progress_data.rollouts_completed
396
+ rollouts_total = progress_data.effective_rollouts_total
397
+ eta_seconds = progress_data.eta_seconds
398
+ # percent_rollouts from backend is 0-1, convert to 0-100 for display
399
+ progress_pct = None
400
+ if progress_data.percent_rollouts is not None:
401
+ progress_pct = progress_data.percent_rollouts * 100.0
402
+ elif progress_data.percent_overall is not None:
403
+ # Fallback to percent_overall if percent_rollouts not available
404
+ progress_pct = progress_data.percent_overall * 100.0
405
+ best_score = progress_data.effective_best_score
406
+
407
+ # Track when we first see progress (for rollouts/min calculation)
408
+ if rollouts_completed is not None and rollouts_completed > 0 and progress_start_time is None:
409
+ progress_start_time = time.time()
410
+
411
+ # Calculate rollouts/min if we have progress and timing info
412
+ rollouts_per_minute = None
413
+ if rollouts_completed is not None and rollouts_completed > 0:
414
+ # Use progress_start_time if available, otherwise fall back to job_start_time
415
+ start_time_for_rate = progress_start_time or job_start_time
416
+ if start_time_for_rate is not None:
417
+ elapsed = time.time() - start_time_for_rate
418
+ if elapsed > 0:
419
+ rate_per_second = rollouts_completed / elapsed
420
+ rollouts_per_minute = rate_per_second * 60.0
421
+
422
+ # Assert data types and ranges
423
+ if rollouts_completed is not None:
424
+ assert isinstance(rollouts_completed, int), (
425
+ f"rollouts_completed must be int, got {type(rollouts_completed).__name__}: {rollouts_completed}"
426
+ )
427
+ assert rollouts_completed >= 0, (
428
+ f"rollouts_completed must be >= 0, got {rollouts_completed}"
429
+ )
430
+
431
+ if rollouts_total is not None:
432
+ assert isinstance(rollouts_total, int), (
433
+ f"rollouts_total must be int, got {type(rollouts_total).__name__}: {rollouts_total}"
434
+ )
435
+ assert rollouts_total > 0, (
436
+ f"rollouts_total must be > 0, got {rollouts_total}"
437
+ )
438
+
439
+ if eta_seconds is not None:
440
+ assert isinstance(eta_seconds, int | float), (
441
+ f"eta_seconds must be int | float, got {type(eta_seconds).__name__}: {eta_seconds}"
442
+ )
443
+ assert eta_seconds >= 0, (
444
+ f"eta_seconds must be >= 0, got {eta_seconds}"
445
+ )
446
+
447
+ if best_score is not None:
448
+ assert isinstance(best_score, int | float), (
449
+ f"best_score must be int | float, got {type(best_score).__name__}: {best_score}"
450
+ )
451
+ assert 0 <= best_score <= 1, (
452
+ f"best_score must be in [0, 1], got {best_score}"
453
+ )
454
+
455
+ if progress_pct is not None:
456
+ assert isinstance(progress_pct, int | float), (
457
+ f"progress_pct must be int | float, got {type(progress_pct).__name__}: {progress_pct}"
458
+ )
459
+ assert 0 <= progress_pct <= 100, (
460
+ f"progress_pct must be in [0, 100], got {progress_pct}"
461
+ )
462
+
463
+ # Assert consistency: rollouts_completed <= rollouts_total
464
+ if rollouts_completed is not None and rollouts_total is not None:
465
+ assert rollouts_completed <= rollouts_total, (
466
+ f"rollouts_completed ({rollouts_completed}) > rollouts_total ({rollouts_total})"
467
+ )
468
+
469
+ # Assert we have meaningful progress data
470
+ has_progress = (
471
+ rollouts_completed is not None
472
+ or best_score is not None
473
+ or rollouts_total is not None
474
+ )
475
+
476
+ # ✅ Initialize custom_fields before use (extract from event data for validation phase tracking)
477
+ custom_fields: dict[str, Any] = {}
478
+ if event.data and isinstance(event.data, dict):
479
+ # Extract phase and validation info if present
480
+ phase = event.data.get("phase")
481
+ if phase == "validation":
482
+ custom_fields["phase"] = "validation"
483
+ if "validation_candidate" in event.data:
484
+ custom_fields["validation_candidate"] = event.data["validation_candidate"]
485
+ if "validation_total" in event.data:
486
+ custom_fields["validation_total"] = event.data["validation_total"]
487
+
488
+ if has_progress:
489
+ # Validate status_tracker before update
490
+ assert status_tracker is not None, "status_tracker is None"
491
+ assert hasattr(status_tracker, "update"), "status_tracker missing update method"
492
+ assert hasattr(status_tracker, "job_id"), "status_tracker missing job_id"
493
+
494
+ status_tracker.update(
495
+ policy=policy,
496
+ environment=environment,
497
+ rollouts_completed=rollouts_completed,
498
+ total_rollouts=rollouts_total,
499
+ eta_seconds=eta_seconds,
500
+ progress_pct=progress_pct,
501
+ best_score=best_score,
502
+ rollouts_per_minute=rollouts_per_minute,
503
+ custom_fields=custom_fields if custom_fields else None,
504
+ )
505
+
506
+ # ✅ ADD: Track progress for stuck detection
507
+ import time as _time_module
508
+ current_time = _time_module.time()
509
+ if rollouts_completed is not None:
510
+ if last_rollouts_completed is None or rollouts_completed != last_rollouts_completed:
511
+ # Progress changed - update tracking
512
+ last_progress_time = current_time
513
+ last_rollouts_completed = rollouts_completed
514
+ last_progress_seq = event.seq
515
+ poller_logger.info(
516
+ "📊 Progress update for job %s: %s/%s rollouts, ETA: %s, Best: %s",
517
+ backend_job_id,
518
+ rollouts_completed,
519
+ rollouts_total,
520
+ eta_seconds,
521
+ best_score,
522
+ )
523
+ elif last_progress_time is not None:
524
+ # Check if stuck (no progress for threshold time)
525
+ time_since_progress = current_time - last_progress_time
526
+ if time_since_progress >= stuck_threshold_seconds:
527
+ poller_logger.warning(
528
+ "⚠️ Job %s appears STUCK: No progress for %.1f minutes (last: %s/%s rollouts at seq %d)",
529
+ backend_job_id,
530
+ time_since_progress / 60.0,
531
+ last_rollouts_completed,
532
+ rollouts_total,
533
+ last_progress_seq,
534
+ )
535
+ # Emit warning event
536
+ with contextlib.suppress(Exception):
537
+ status_tracker.update(
538
+ custom_fields={
539
+ **(custom_fields or {}),
540
+ "stuck_warning": True,
541
+ "time_since_progress_seconds": time_since_progress,
542
+ }
543
+ )
544
+ else:
545
+ # No rollouts info - log anyway
546
+ poller_logger.info(
547
+ "📊 Progress update for job %s: %s/%s rollouts, ETA: %s, Best: %s",
548
+ backend_job_id,
549
+ rollouts_completed,
550
+ rollouts_total,
551
+ eta_seconds,
552
+ best_score,
553
+ )
554
+
555
+ # Update last_seq (always update, even if no progress data)
556
+ last_seq = max(last_seq, event.seq)
557
+ else:
558
+ # Non-progress event - just update seq
559
+ last_seq = max(last_seq, event.seq)
560
+
561
+ # ✅ ADD: Track consecutive polls with no new events
562
+ if events_received == 0:
563
+ # Increment counter for no-event polls
564
+ if not hasattr(_poll_backend_progress, '_no_event_polls'):
565
+ _poll_backend_progress._no_event_polls = {} # type: ignore[attr-defined]
566
+ if backend_job_id not in _poll_backend_progress._no_event_polls: # type: ignore[attr-defined]
567
+ _poll_backend_progress._no_event_polls[backend_job_id] = 0 # type: ignore[attr-defined]
568
+ _poll_backend_progress._no_event_polls[backend_job_id] += 1 # type: ignore[attr-defined]
569
+ no_event_count = _poll_backend_progress._no_event_polls[backend_job_id] # type: ignore[attr-defined]
570
+
571
+ # Warn if we've had many consecutive polls with no events
572
+ if no_event_count >= 12: # 12 polls * 5s = 60s with no events
573
+ poller_logger.warning(
574
+ "⚠️ Job %s: No new events for %d consecutive polls (~%ds). Last seq: %d. Job may be stuck.",
575
+ backend_job_id,
576
+ no_event_count,
577
+ no_event_count * int(base_poll_interval),
578
+ last_seq,
579
+ )
580
+ # Emit warning in status_json
581
+ with contextlib.suppress(Exception):
582
+ status_tracker.update(
583
+ custom_fields={
584
+ "no_event_polls": no_event_count,
585
+ "last_event_seq": last_seq,
586
+ "stuck_warning": True,
587
+ }
588
+ )
589
+
590
+ poller_logger.info("Progress poller heartbeat for job %s (no new events, last_seq=%d, consecutive_no_events=%d)", backend_job_id, last_seq, no_event_count)
591
+ else:
592
+ # Reset counter when we get events
593
+ if hasattr(_poll_backend_progress, '_no_event_polls') and backend_job_id in _poll_backend_progress._no_event_polls: # type: ignore[attr-defined]
594
+ _poll_backend_progress._no_event_polls[backend_job_id] = 0 # type: ignore[attr-defined]
595
+
596
+ event_types_str = ", ".join(f"{k}:{v}" for k, v in sorted(event_types_seen.items()))
597
+ poller_logger.info(
598
+ "Processed %d events (types: %s), updated last_seq to %d",
599
+ events_received,
600
+ event_types_str,
601
+ last_seq,
602
+ )
603
+ # Log if we're not seeing progress events
604
+ if "prompt.learning.progress" not in event_types_seen:
605
+ poller_logger.debug(
606
+ "No progress events in this batch (last_seq=%d). Event types seen: %s",
607
+ last_seq,
608
+ event_types_str,
609
+ )
610
+
611
+ # Reset timeout counter on successful request
612
+ consecutive_timeouts = 0
613
+
614
+ except AssertionError as e:
615
+ poller_logger.error(
616
+ "❌ Assertion failed while parsing events for job %s: %s. Response: %s",
617
+ backend_job_id,
618
+ e,
619
+ resp.text[:500] if resp else "No response",
620
+ )
621
+ # Continue polling - don't stop on validation errors
622
+ except ValueError as e:
623
+ poller_logger.error(
624
+ "❌ Invalid API response format for job %s: %s. Response: %s",
625
+ backend_job_id,
626
+ e,
627
+ resp.text[:500] if resp else "No response",
628
+ )
629
+ # Continue polling - don't stop on validation errors
630
+ except Exception as e:
631
+ poller_logger.error(
632
+ "❌ Unexpected error parsing events for job %s: %s. Response: %s",
633
+ backend_job_id,
634
+ e,
635
+ resp.text[:500] if resp else "No response",
636
+ exc_info=True,
637
+ )
638
+ # Continue polling - don't stop on parsing errors
639
+ elif resp.status_code == 404:
640
+ # Job not found yet or doesn't exist - stop polling
641
+ poller_logger.warning("Backend job %s not found (404), stopping poller", backend_job_id)
642
+ break
643
+ elif resp.status_code != 200:
644
+ poller_logger.warning(
645
+ "Backend API returned status %d for job %s: %s",
646
+ resp.status_code,
647
+ backend_job_id,
648
+ resp.text[:200],
649
+ )
650
+ except requests.exceptions.ReadTimeout as e:
651
+ # ReadTimeout is expected when backend is slow - log as warning and use exponential backoff
652
+ consecutive_timeouts += 1
653
+ backoff_seconds = min(base_poll_interval * (2 ** min(consecutive_timeouts - 1, 4)), 60.0) # Max 60s backoff
654
+ poller_logger.warning(
655
+ "Backend timeout polling job %s (consecutive=%d, backing off %.1fs): %s",
656
+ backend_job_id,
657
+ consecutive_timeouts,
658
+ backoff_seconds,
659
+ e,
660
+ )
661
+ # Use exponential backoff on timeout
662
+ stop_event.wait(timeout=backoff_seconds)
663
+ continue
664
+ except requests.exceptions.RequestException as e:
665
+ # Other network errors - log as warning, reset timeout counter
666
+ consecutive_timeouts = 0
667
+ poller_logger.warning("Network error polling job %s: %s", backend_job_id, e)
668
+ except Exception as e:
669
+ # Unexpected errors - log as error but don't crash
670
+ consecutive_timeouts = 0
671
+ poller_logger.error("Progress poller error for job %s: %s", backend_job_id, e, exc_info=True)
672
+
673
+ # Poll every 5 seconds (or after backoff)
674
+ stop_event.wait(timeout=base_poll_interval)
675
+
676
+ poller_logger.info("📡 Stopped progress poller for backend job %s", backend_job_id)
677
+
678
+
679
+ def _truncate(text: str, limit: int = 4000) -> str:
680
+ """Truncate text to a maximum length, keeping the end portion.
681
+
682
+ Args:
683
+ text: Text to truncate
684
+ limit: Maximum length in characters (default: 4000)
685
+
686
+ Returns:
687
+ Truncated text (last `limit` characters if text exceeds limit)
688
+ """
689
+ if len(text) <= limit:
690
+ return text
691
+ return text[-limit:]
692
+
693
+
694
+ def _build_train_command(config_path: str) -> list[str]:
695
+ """Build the training command for running a prompt learning job.
696
+
697
+ Constructs a command list suitable for subprocess execution by:
698
+ 1. Getting the base command from EXPERIMENT_QUEUE_TRAIN_CMD env var or default
699
+ 2. Parsing the base command into segments
700
+ 3. Appending prompt learning specific flags (--type, --config, --poll, etc.)
701
+ 4. Adding --backend flag with URL from experiment queue config
702
+
703
+ Args:
704
+ config_path: Path to the TOML config file for the experiment
705
+
706
+ Returns:
707
+ List of command segments ready for subprocess execution
708
+
709
+ Note:
710
+ The base command defaults to `python -m synth_ai.cli train` if
711
+ EXPERIMENT_QUEUE_TRAIN_CMD is not set. The command always includes
712
+ --type prompt_learning, --config, --poll, --stream-format cli, and --backend flags.
713
+ """
714
+ # Get command from env var or use default (lazily evaluated)
715
+ base_cmd = os.getenv(TRAIN_COMMAND_ENV)
716
+ if base_cmd:
717
+ logger.debug("Using training command from EXPERIMENT_QUEUE_TRAIN_CMD: %s", base_cmd)
718
+ else:
719
+ base_cmd = _get_default_train_cmd()
720
+ logger.debug("Using default training command: %s", base_cmd)
721
+
722
+ segments: list[str] = []
723
+ for part in shlex.split(base_cmd):
724
+ if part:
725
+ segments.append(part)
726
+
727
+ # Get backend URL from config and add --backend flag
728
+ config = load_config()
729
+ backend_url = config.backend_url
730
+
731
+ segments.extend(
732
+ [
733
+ "--type",
734
+ "prompt_learning",
735
+ "--config",
736
+ config_path,
737
+ "--backend",
738
+ backend_url,
739
+ "--poll",
740
+ "--stream-format",
741
+ "cli",
742
+ ]
743
+ )
744
+ return segments
745
+
746
+
747
+ def _mark_job_running(job_id: str, task_id: str | None) -> ExperimentJob | None:
748
+ """Mark a job as running and update its status in the database.
749
+
750
+ Updates the job status to RUNNING, sets the started_at timestamp, and
751
+ optionally associates a Celery task ID. If the parent experiment is
752
+ QUEUED, it is also marked as RUNNING.
753
+
754
+ Args:
755
+ job_id: Job identifier
756
+ task_id: Optional Celery task ID to associate with the job
757
+
758
+ Returns:
759
+ ExperimentJob instance if found, None otherwise
760
+
761
+ Note:
762
+ The job is expunged from the session so it can be safely used outside
763
+ the session scope. The session is committed automatically by session_scope.
764
+ """
765
+ with session_scope() as session:
766
+ job = session.get(ExperimentJob, job_id)
767
+ if not job:
768
+ logger.warning("Job %s missing from database", job_id)
769
+ return None
770
+ job.status = ExperimentJobStatus.RUNNING
771
+ job.started_at = datetime.now(UTC)
772
+ if task_id:
773
+ job.celery_task_id = task_id
774
+ experiment = job.experiment
775
+ if experiment and experiment.status == ExperimentStatus.QUEUED:
776
+ experiment.status = ExperimentStatus.RUNNING
777
+ experiment.started_at = datetime.now(UTC)
778
+ session.flush()
779
+ # Expunge so job can be safely used outside session scope
780
+ session.expunge(job)
781
+ return job
782
+
783
+
784
+ def _jobs_remaining(session, experiment_id: str) -> int:
785
+ """Count remaining jobs (QUEUED or RUNNING) for an experiment.
786
+
787
+ Args:
788
+ session: SQLAlchemy session
789
+ experiment_id: Experiment identifier
790
+
791
+ Returns:
792
+ Number of jobs that are still QUEUED or RUNNING (not completed/failed)
793
+ """
794
+ return (
795
+ session.query(ExperimentJob)
796
+ .filter(
797
+ ExperimentJob.experiment_id == experiment_id,
798
+ ExperimentJob.status.in_(
799
+ [
800
+ ExperimentJobStatus.QUEUED,
801
+ ExperimentJobStatus.RUNNING,
802
+ ]
803
+ ),
804
+ )
805
+ .count()
806
+ )
807
+
808
+
809
+ def _finalize_job(
810
+ job_id: str,
811
+ *,
812
+ summary: ResultSummary,
813
+ success: bool,
814
+ error_message: str | None = None,
815
+ command: str | None = None,
816
+ working_directory: str | None = None,
817
+ python_executable: str | None = None,
818
+ environment_keys: list[str] | None = None,
819
+ ) -> dict[str, Any] | None:
820
+ """Finalize a job by updating its status and persisting results.
821
+
822
+ Updates the job status to COMPLETED or FAILED based on success flag,
823
+ persists trial data if successful, and updates experiment status when
824
+ all jobs are done. If the experiment has remaining jobs, dispatches them.
825
+
826
+ Args:
827
+ job_id: Job identifier
828
+ summary: Result summary containing stdout, stderr, metrics, etc.
829
+ success: Whether the job completed successfully
830
+ error_message: Optional error message if job failed
831
+
832
+ Returns:
833
+ Summary dictionary if job found, None otherwise
834
+
835
+ Note:
836
+ - If successful: Job status set to COMPLETED, trials persisted
837
+ - If failed: Job status set to FAILED, error message stored
838
+ - Experiment status updated to COMPLETED/FAILED only when all jobs done
839
+ - Remaining jobs are dispatched if experiment still has queued jobs
840
+ """
841
+ with session_scope() as session:
842
+ job = session.get(ExperimentJob, job_id)
843
+ if not job:
844
+ logger.warning("Job %s missing during finalize", job_id)
845
+ return None
846
+
847
+ job.completed_at = datetime.now(UTC)
848
+ experiment = job.experiment
849
+
850
+ # ALWAYS create execution log entry (for both success and failure)
851
+ # This allows querying failures directly from the database
852
+ if command is not None and working_directory is not None:
853
+ from uuid import uuid4
854
+ # For failed jobs, store full stdout/stderr (up to 100k chars each)
855
+ # For successful jobs, truncate to 4k chars to save space
856
+ stdout_for_log = summary.stdout or ""
857
+ stderr_for_log = summary.stderr or ""
858
+ if not success:
859
+ # Keep full output for errors (truncate only if extremely large)
860
+ if len(stdout_for_log) > 100000:
861
+ stdout_for_log = f"{stdout_for_log[:50000]}\n\n... (truncated {len(stdout_for_log) - 100000} chars) ...\n\n{stdout_for_log[-50000:]}"
862
+ if len(stderr_for_log) > 100000:
863
+ stderr_for_log = f"{stderr_for_log[:50000]}\n\n... (truncated {len(stderr_for_log) - 100000} chars) ...\n\n{stderr_for_log[-50000:]}"
864
+ else:
865
+ # Truncate successful job output to save space
866
+ stdout_for_log = _truncate(stdout_for_log)
867
+ stderr_for_log = _truncate(stderr_for_log)
868
+
869
+ execution_log = JobExecutionLog(
870
+ log_id=f"log_{uuid4().hex[:12]}",
871
+ job_id=job_id,
872
+ command=command,
873
+ working_directory=working_directory,
874
+ returncode=summary.returncode,
875
+ stdout=stdout_for_log,
876
+ stderr=stderr_for_log,
877
+ python_executable=python_executable,
878
+ environment_keys=environment_keys,
879
+ )
880
+ session.add(execution_log)
881
+ logger.info(
882
+ "Created execution log for job %s: returncode=%d, stdout_len=%d (stored: %d), stderr_len=%d (stored: %d)%s",
883
+ job_id,
884
+ summary.returncode,
885
+ len(summary.stdout or ""),
886
+ len(stdout_for_log),
887
+ len(summary.stderr or ""),
888
+ len(stderr_for_log),
889
+ " [FULL ERROR STORED]" if not success else "",
890
+ )
891
+
892
+ if success:
893
+ # Only set job.result for successful jobs to prevent stale data from previous runs
894
+ job.result = summary.to_dict()
895
+ job.status = ExperimentJobStatus.COMPLETED
896
+ persist_trials_from_summary(session, job, summary)
897
+ if experiment:
898
+ update_experiment_metadata(experiment, summary)
899
+
900
+ # ✅ ADD: Update status_json with final stats from backend job metadata
901
+ if job.backend_job_id:
902
+ try:
903
+ import requests
904
+
905
+ from .service import update_job_status
906
+
907
+ # Fetch backend job metadata
908
+ config = load_config()
909
+ backend_url = config.backend_url
910
+ # Load API key from .env - fail loudly if not found
911
+ try:
912
+ api_key = _load_synth_api_key()
913
+ except RuntimeError as e:
914
+ logger.error(str(e))
915
+ raise
916
+
917
+ if backend_url and api_key:
918
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{job.backend_job_id}"
919
+ headers = {"Authorization": f"Bearer {api_key}"}
920
+ resp = requests.get(url, headers=headers, timeout=60.0) # Increased from 10s to 60s to handle backend overload
921
+
922
+ if resp.status_code == 200:
923
+ backend_job = resp.json()
924
+ backend_metadata = backend_job.get("metadata", {})
925
+ backend_stats = backend_metadata.get("stats", {})
926
+
927
+ if backend_stats:
928
+ # Update status_json with final stats (including scores for result extraction)
929
+ status_update = {
930
+ "trials_tried": backend_stats.get("trials_tried"),
931
+ "total_tokens": backend_stats.get("total_tokens"),
932
+ "total_rollouts": backend_stats.get("total_rollouts"),
933
+ "optimization_rollouts_executed": backend_stats.get("optimization_rollouts_executed"),
934
+ "validation_rollouts_executed": backend_stats.get("validation_rollouts_executed"),
935
+ "optimization_trials_evaluated": backend_stats.get("optimization_trials_evaluated"),
936
+ "validation_trials_evaluated": backend_stats.get("validation_trials_evaluated"),
937
+ # CRITICAL: Store scores for result extraction (if backend job returns 404 later)
938
+ "baseline_score": backend_stats.get("baseline_score"),
939
+ "best_score": backend_stats.get("best_score") or backend_stats.get("best_validation_score"),
940
+ "total_time_seconds": backend_stats.get("total_time_seconds"),
941
+ "eval_seeds_n": backend_stats.get("eval_seeds_n"),
942
+ "transformations_evaluated": backend_stats.get("transformations_evaluated"),
943
+ }
944
+ # Remove None values
945
+ status_update = {k: v for k, v in status_update.items() if v is not None}
946
+ # ✅ ADD: Assertion to ensure we have at least some stats
947
+ assert len(status_update) > 0, f"status_update must not be empty for job {job_id}"
948
+ if status_update:
949
+ update_job_status(job_id, status_update)
950
+ logger.info(
951
+ "Updated status_json with final stats for job %s: %s",
952
+ job_id,
953
+ status_update,
954
+ )
955
+ except Exception as e:
956
+ # Log but don't fail job finalization if stats update fails
957
+ logger.warning(
958
+ "Failed to update status_json with final stats for job %s: %s",
959
+ job_id,
960
+ e,
961
+ )
962
+ else:
963
+ # Job failed - clear job.result to prevent stale data from previous successful runs
964
+ job.result = None
965
+ job.status = ExperimentJobStatus.FAILED
966
+ # Store full error message (truncate to 100k chars max to avoid DB issues, but keep full context)
967
+ full_error = error_message or summary.stderr or "Job failed"
968
+ if len(full_error) > 100000:
969
+ # Keep first 50k and last 50k chars
970
+ full_error = f"{full_error[:50000]}\n\n... (truncated {len(full_error) - 100000} chars) ...\n\n{full_error[-50000:]}"
971
+ job.error = full_error
972
+ if experiment:
973
+ # Don't immediately mark experiment as failed - let remaining jobs continue
974
+ # The experiment will be marked as failed only if all jobs fail
975
+ logger.warning(
976
+ "Job %s failed for experiment %s, but allowing remaining jobs to continue",
977
+ job_id,
978
+ experiment.experiment_id,
979
+ )
980
+
981
+ session.flush()
982
+
983
+ if experiment:
984
+ remaining = _jobs_remaining(session, experiment.experiment_id)
985
+ if remaining == 0:
986
+ # All jobs completed - check if experiment succeeded or failed
987
+ all_jobs = (
988
+ session.query(ExperimentJob)
989
+ .filter(ExperimentJob.experiment_id == experiment.experiment_id)
990
+ .all()
991
+ )
992
+ all_failed = all(
993
+ job.status == ExperimentJobStatus.FAILED for job in all_jobs
994
+ )
995
+ if all_failed:
996
+ experiment.status = ExperimentStatus.FAILED
997
+ experiment.error = (
998
+ all_jobs[0].error if all_jobs else "All jobs failed"
999
+ )
1000
+ else:
1001
+ experiment.status = ExperimentStatus.COMPLETED
1002
+ experiment.completed_at = datetime.now(UTC)
1003
+ else:
1004
+ # Dispatch remaining jobs (periodic task will also handle this as backup)
1005
+ dispatch_available_jobs(session, experiment.experiment_id)
1006
+
1007
+ return summary.to_dict()
1008
+
1009
+
1010
+ @celery_app.task(bind=True, name="synth_ai.cli.local.experiment_queue.run_experiment_job")
1011
+ def run_experiment_job(self, job_id: str) -> dict[str, Any] | None:
1012
+ """Celery task entrypoint for running a prompt learning experiment job.
1013
+
1014
+ This is the main Celery task that executes prompt learning jobs. It:
1015
+ 1. Marks the job as RUNNING
1016
+ 2. Prepares the config file (applies overrides)
1017
+ 3. Builds and executes the training command via subprocess
1018
+ 4. Collects results (stdout, stderr, metrics, artifacts)
1019
+ 5. Finalizes the job (updates status, persists results)
1020
+
1021
+ Args:
1022
+ self: Celery task instance (bound task)
1023
+ job_id: Job identifier from the experiment queue database
1024
+
1025
+ Returns:
1026
+ Result summary dictionary if successful, None if job not found
1027
+
1028
+ Raises:
1029
+ AssertionError: If inputs are invalid (should not happen in production)
1030
+
1031
+ Note:
1032
+ The task runs the training command (`synth-ai train --type prompt_learning`)
1033
+ as a subprocess and captures stdout/stderr. Health check failures and
1034
+ authentication errors are detected and cause job failure even if returncode is 0.
1035
+ """
1036
+ # Validate input
1037
+ assert isinstance(job_id, str), (
1038
+ f"job_id must be str, got {type(job_id).__name__}: {job_id}"
1039
+ )
1040
+ assert job_id, "job_id cannot be empty"
1041
+
1042
+ job = _mark_job_running(job_id, getattr(self.request, "id", None))
1043
+ if not job:
1044
+ logger.warning("Job %s not found or could not be marked as running", job_id)
1045
+ return None
1046
+
1047
+ # Validate job object
1048
+ assert isinstance(job, ExperimentJob), (
1049
+ f"_mark_job_running must return ExperimentJob, got {type(job).__name__}"
1050
+ )
1051
+ assert job.job_id == job_id, (
1052
+ f"Job ID mismatch: expected {job_id}, got {job.job_id}"
1053
+ )
1054
+ assert job.status == ExperimentJobStatus.RUNNING, (
1055
+ f"Job status must be RUNNING, got {job.status}"
1056
+ )
1057
+
1058
+ summary = ResultSummary()
1059
+ prepared: PreparedConfig | None = None
1060
+ success = False
1061
+ error_message: str | None = None # Will be set if training fails
1062
+ cmd: list[str] | None = None # Store command for execution logging
1063
+ env: dict[str, str] | None = None # Store environment for execution logging
1064
+
1065
+ # Initialize status tracker
1066
+ assert job.job_id, "job.job_id cannot be empty"
1067
+ status_tracker = ExperimentStatusTracker(job.job_id)
1068
+ assert status_tracker.job_id == job.job_id, (
1069
+ f"Status tracker job_id mismatch: expected {job.job_id}, got {status_tracker.job_id}"
1070
+ )
1071
+
1072
+ job_start_time = time.time()
1073
+ assert job_start_time > 0, f"job_start_time must be > 0, got {job_start_time}"
1074
+
1075
+ policy: str | None = None
1076
+ environment: str | None = None
1077
+
1078
+ try:
1079
+ # Validate config_path
1080
+ assert job.config_path, "job.config_path cannot be empty"
1081
+ assert isinstance(job.config_path, str), (
1082
+ f"job.config_path must be str, got {type(job.config_path).__name__}"
1083
+ )
1084
+
1085
+ # Validate config_overrides
1086
+ if job.config_overrides is not None:
1087
+ assert isinstance(job.config_overrides, dict), (
1088
+ f"job.config_overrides must be dict, got {type(job.config_overrides).__name__}"
1089
+ )
1090
+
1091
+ prepared = prepare_config_file(job.config_path, job.config_overrides or {})
1092
+ assert prepared is not None, "prepare_config_file returned None"
1093
+ assert isinstance(prepared, PreparedConfig), (
1094
+ f"prepare_config_file must return PreparedConfig, got {type(prepared).__name__}"
1095
+ )
1096
+ assert prepared.path.exists(), (
1097
+ f"Prepared config file must exist: {prepared.path}"
1098
+ )
1099
+
1100
+ # Extract policy and environment from config
1101
+ policy, environment = extract_config_info(prepared.path)
1102
+ assert isinstance(policy, str | type(None)), (
1103
+ f"policy must be str | None, got {type(policy).__name__}: {policy}"
1104
+ )
1105
+ assert isinstance(environment, str | type(None)), (
1106
+ f"environment must be str | None, got {type(environment).__name__}: {environment}"
1107
+ )
1108
+
1109
+ # Extract model/provider from override FIRST (override takes precedence)
1110
+ model_override = None
1111
+ provider_override = None
1112
+ if job.config_overrides:
1113
+ model_override = job.config_overrides.get("prompt_learning.policy.model")
1114
+ provider_override = job.config_overrides.get("prompt_learning.policy.provider")
1115
+
1116
+ # Use override if available, otherwise use extracted
1117
+ final_model = model_override or policy
1118
+ final_provider = provider_override
1119
+
1120
+ # ASSERT: Verify overrides were applied by checking the prepared config
1121
+ if job.config_overrides:
1122
+ rollout_budget_override = job.config_overrides.get("prompt_learning.gepa.rollout.budget")
1123
+ max_rollouts_override = job.config_overrides.get("prompt_learning.termination_config.max_rollouts")
1124
+
1125
+ # Assert model override matches extracted policy
1126
+ if model_override:
1127
+ assert policy == model_override, (
1128
+ f"CRITICAL: Policy model mismatch for job {job.job_id}: "
1129
+ f"override={model_override!r} but extracted={policy!r}. "
1130
+ f"This indicates the override wasn't applied correctly to the prepared config. "
1131
+ f"Config path: {prepared.path}"
1132
+ )
1133
+ logger.info(
1134
+ "✅ Config override verified for job %s: model=%s matches extracted policy",
1135
+ job.job_id,
1136
+ model_override,
1137
+ )
1138
+
1139
+ # Assert provider override if specified
1140
+ if provider_override:
1141
+ # Extract provider from prepared config
1142
+ import tomllib
1143
+ with open(prepared.path, "rb") as f:
1144
+ prepared_config = tomllib.load(f)
1145
+ pl_section = prepared_config.get("prompt_learning", {})
1146
+ policy_section = pl_section.get("policy", {})
1147
+ extracted_provider = policy_section.get("provider") if isinstance(policy_section, dict) else None
1148
+ if extracted_provider:
1149
+ assert extracted_provider == provider_override, (
1150
+ f"CRITICAL: Provider mismatch for job {job.job_id}: "
1151
+ f"override={provider_override!r} but extracted={extracted_provider!r}. "
1152
+ f"Config path: {prepared.path}"
1153
+ )
1154
+
1155
+ # Assert rollout budget override if specified
1156
+ if rollout_budget_override is not None:
1157
+ import tomllib
1158
+ with open(prepared.path, "rb") as f:
1159
+ prepared_config = tomllib.load(f)
1160
+ pl_section = prepared_config.get("prompt_learning", {})
1161
+ gepa_section = pl_section.get("gepa", {})
1162
+ rollout_section = gepa_section.get("rollout", {}) if isinstance(gepa_section, dict) else {}
1163
+ extracted_budget = rollout_section.get("budget") if isinstance(rollout_section, dict) else None
1164
+ if extracted_budget is not None:
1165
+ assert extracted_budget == rollout_budget_override, (
1166
+ f"CRITICAL: Rollout budget mismatch for job {job.job_id}: "
1167
+ f"override={rollout_budget_override} but extracted={extracted_budget}. "
1168
+ f"Config path: {prepared.path}"
1169
+ )
1170
+
1171
+ # Assert max_rollouts override if specified
1172
+ if max_rollouts_override is not None:
1173
+ import tomllib
1174
+ with open(prepared.path, "rb") as f:
1175
+ prepared_config = tomllib.load(f)
1176
+ pl_section = prepared_config.get("prompt_learning", {})
1177
+ termination_section = pl_section.get("termination_config", {})
1178
+ extracted_max_rollouts = termination_section.get("max_rollouts") if isinstance(termination_section, dict) else None
1179
+ if extracted_max_rollouts is not None:
1180
+ assert extracted_max_rollouts == max_rollouts_override, (
1181
+ f"CRITICAL: Max rollouts mismatch for job {job.job_id}: "
1182
+ f"override={max_rollouts_override} but extracted={extracted_max_rollouts}. "
1183
+ f"Config path: {prepared.path}"
1184
+ )
1185
+
1186
+ if final_model or environment:
1187
+ # Build policy string with provider if available
1188
+ policy_str = f"{final_provider}/{final_model}" if final_provider and final_model else final_model
1189
+ status_tracker.update(policy=policy_str, environment=environment)
1190
+ logger.info(
1191
+ "📊 Experiment config for job %s: policy=%s, environment=%s",
1192
+ job.job_id,
1193
+ policy or "unknown",
1194
+ environment or "unknown",
1195
+ )
1196
+
1197
+ cmd = _build_train_command(str(prepared.path))
1198
+ assert isinstance(cmd, list), (
1199
+ f"_build_train_command must return list, got {type(cmd).__name__}"
1200
+ )
1201
+ # Store cmd for execution logging (needed at end of function)
1202
+ assert len(cmd) > 0, "Command list cannot be empty"
1203
+ assert all(isinstance(arg, str) for arg in cmd), (
1204
+ f"All command arguments must be str, got types: {[type(arg).__name__ for arg in cmd]}"
1205
+ )
1206
+ logger.info("Executing job %s via command: %s", job.job_id, " ".join(cmd))
1207
+
1208
+ # Run command with unbuffered output to see errors immediately
1209
+ env = os.environ.copy()
1210
+ assert isinstance(env, dict), (
1211
+ f"os.environ.copy() must return dict, got {type(env).__name__}"
1212
+ )
1213
+ env["PYTHONUNBUFFERED"] = "1"
1214
+
1215
+ # Log authentication status BEFORE running command
1216
+ synth_key = env.get("SYNTH_API_KEY")
1217
+ env_key = env.get("ENVIRONMENT_API_KEY")
1218
+ logger.info(
1219
+ "🔐 Authentication status for job %s:\n"
1220
+ " SYNTH_API_KEY: %s\n"
1221
+ " ENVIRONMENT_API_KEY: %s",
1222
+ job.job_id,
1223
+ f"{synth_key[:8]}...{synth_key[-4:]}" if synth_key and len(synth_key) > 12 else "(NOT SET)",
1224
+ f"{env_key[:8]}...{env_key[-4:]}" if env_key and len(env_key) > 12 else "(NOT SET)",
1225
+ )
1226
+
1227
+ logger.info(
1228
+ "🚀 Starting subprocess for job %s:\n"
1229
+ " Command: %s\n"
1230
+ " Working directory: %s\n"
1231
+ " Python executable: %s\n"
1232
+ " Environment keys: %s",
1233
+ job.job_id,
1234
+ " ".join(cmd),
1235
+ os.getcwd(),
1236
+ env.get("PYTHON", "python"),
1237
+ ", ".join(sorted([k for k in env if "API" in k or "KEY" in k])),
1238
+ )
1239
+
1240
+ # Get backend URL and API key for progress polling
1241
+ config = load_config()
1242
+ assert config is not None, "load_config() returned None"
1243
+ backend_url = config.backend_url
1244
+ assert isinstance(backend_url, str), (
1245
+ f"config.backend_url must be str, got {type(backend_url).__name__}"
1246
+ )
1247
+ assert backend_url.startswith(("http://", "https://")), (
1248
+ f"backend_url must start with http:// or https://, got {backend_url}"
1249
+ )
1250
+
1251
+ # Get API key from .env file - fail loudly if not found
1252
+ # This is needed for the poller thread, which runs in the worker process
1253
+ try:
1254
+ api_key = _load_synth_api_key()
1255
+ except RuntimeError as e:
1256
+ logger.error(str(e))
1257
+ raise
1258
+
1259
+ # Start background progress poller (will be started once we have backend_job_id)
1260
+ poller_stop = threading.Event()
1261
+ assert poller_stop is not None, "threading.Event() returned None"
1262
+ poller_thread: threading.Thread | None = None
1263
+ backend_job_id: str | None = None
1264
+
1265
+ try:
1266
+ # Stream subprocess output line-by-line to extract backend_job_id and parse progress
1267
+ process = subprocess.Popen(
1268
+ cmd,
1269
+ stdout=subprocess.PIPE,
1270
+ stderr=subprocess.STDOUT,
1271
+ text=True,
1272
+ env=env,
1273
+ bufsize=1, # Line buffered
1274
+ )
1275
+ assert process is not None, "subprocess.Popen() returned None"
1276
+ assert process.stdout is not None, "process.stdout is None"
1277
+
1278
+ stdout_lines: list[str] = []
1279
+ accumulated_output = "" # Accumulate output for better pattern matching
1280
+ last_status_update_time = job_start_time
1281
+ status_update_interval = 5.0 # Update status_json every 5 seconds even without progress
1282
+ assert status_update_interval > 0, (
1283
+ f"status_update_interval must be > 0, got {status_update_interval}"
1284
+ )
1285
+
1286
+ # Read output line-by-line with timeout protection
1287
+ # If subprocess crashes immediately, we need to ensure we capture the error
1288
+ try:
1289
+ # Read output line-by-line
1290
+ for line in process.stdout:
1291
+ assert isinstance(line, str), (
1292
+ f"process.stdout line must be str, got {type(line).__name__}"
1293
+ )
1294
+ stdout_lines.append(line)
1295
+ assert isinstance(accumulated_output, str), (
1296
+ f"accumulated_output must be str, got {type(accumulated_output).__name__}"
1297
+ )
1298
+ accumulated_output += line
1299
+ assert len(accumulated_output) >= len(line), (
1300
+ f"accumulated_output length should increase, got {len(accumulated_output)} < {len(line)}"
1301
+ )
1302
+
1303
+ # Try to extract backend_job_id from output
1304
+ if not backend_job_id:
1305
+ extracted_id = _extract_backend_job_id(line)
1306
+ if extracted_id:
1307
+ # Assert extracted ID is valid before using it
1308
+ assert extracted_id.startswith("pl_"), (
1309
+ f"Invalid backend_job_id format: {extracted_id}"
1310
+ )
1311
+ assert len(extracted_id) > 3, (
1312
+ f"Backend job ID too short: {extracted_id}"
1313
+ )
1314
+
1315
+ backend_job_id = extracted_id
1316
+ logger.info("📋 Extracted backend job ID: %s", backend_job_id)
1317
+
1318
+ # ✅ ADD: Store backend_job_id in status_json for debugging
1319
+ status_tracker.update(custom_fields={"backend_job_id": backend_job_id})
1320
+ logger.info("📋 Stored backend_job_id in status_json for job %s", job.job_id)
1321
+
1322
+ # Update job with backend_job_id
1323
+ with session_scope() as session:
1324
+ db_job = session.get(ExperimentJob, job.job_id)
1325
+ if db_job:
1326
+ db_job.backend_job_id = backend_job_id
1327
+ session.commit()
1328
+
1329
+ # Start progress poller now that we have backend_job_id
1330
+ # API key should already be loaded and validated above
1331
+ if not api_key:
1332
+ raise RuntimeError(
1333
+ f"❌ SYNTH_API_KEY not available for job {job.job_id}. "
1334
+ "This should have been caught earlier - API key loading failed."
1335
+ )
1336
+ elif not backend_url:
1337
+ logger.warning(
1338
+ "⚠️ Cannot start progress poller for job %s: backend_url not configured. "
1339
+ "Progress updates will not be available, but job will continue.",
1340
+ job.job_id,
1341
+ )
1342
+ elif backend_job_id and not backend_job_id.startswith("pl_"):
1343
+ logger.warning(
1344
+ "⚠️ Cannot start progress poller for job %s: invalid backend_job_id format: %s. "
1345
+ "Progress updates will not be available, but job will continue.",
1346
+ job.job_id,
1347
+ backend_job_id,
1348
+ )
1349
+
1350
+ if api_key and backend_url and backend_job_id and backend_job_id.startswith("pl_"):
1351
+ # Validate all inputs before starting thread
1352
+ assert isinstance(backend_job_id, str), (
1353
+ f"backend_job_id must be str, got {type(backend_job_id).__name__}"
1354
+ )
1355
+ assert isinstance(status_tracker, ExperimentStatusTracker), (
1356
+ f"status_tracker must be ExperimentStatusTracker, got {type(status_tracker).__name__}"
1357
+ )
1358
+ assert isinstance(backend_url, str), (
1359
+ f"backend_url must be str, got {type(backend_url).__name__}"
1360
+ )
1361
+ assert isinstance(api_key, str), (
1362
+ f"api_key must be str, got {type(api_key).__name__}"
1363
+ )
1364
+ assert poller_stop is not None, "poller_stop cannot be None"
1365
+
1366
+ poller_thread = threading.Thread(
1367
+ target=_poll_backend_progress,
1368
+ args=(
1369
+ backend_job_id,
1370
+ status_tracker,
1371
+ policy,
1372
+ environment,
1373
+ backend_url,
1374
+ api_key,
1375
+ poller_stop,
1376
+ job_start_time, # Pass job start time for rollouts/min calculation
1377
+ ),
1378
+ daemon=True,
1379
+ )
1380
+ assert poller_thread is not None, "threading.Thread() returned None"
1381
+ poller_thread.start()
1382
+ assert poller_thread.is_alive() or not poller_thread.is_alive(), (
1383
+ "Thread should be startable"
1384
+ )
1385
+ logger.info("📡 Started progress poller for backend job %s", backend_job_id)
1386
+ else:
1387
+ logger.warning(
1388
+ "Cannot start progress poller: missing API key or backend URL"
1389
+ )
1390
+
1391
+ # Parse accumulated output for progress updates (fallback if API polling fails)
1392
+ # Use accumulated output (not just current line) for better pattern matching
1393
+ # Update status_json periodically even without progress data to show elapsed time
1394
+ current_time = time.time()
1395
+ assert current_time >= job_start_time, (
1396
+ f"current_time ({current_time}) < job_start_time ({job_start_time})"
1397
+ )
1398
+ assert isinstance(accumulated_output, str), (
1399
+ f"accumulated_output must be str, got {type(accumulated_output).__name__}"
1400
+ )
1401
+
1402
+ should_update = (
1403
+ # Update if we find progress patterns
1404
+ "rollouts=" in line.lower() or
1405
+ "progress:" in line.lower() or
1406
+ "gepa progress:" in line.lower() or
1407
+ # Or update periodically (every 5 seconds) to show elapsed time
1408
+ (current_time - last_status_update_time) >= status_update_interval
1409
+ )
1410
+ assert isinstance(should_update, bool), (
1411
+ f"should_update must be bool, got {type(should_update).__name__}"
1412
+ )
1413
+
1414
+ if should_update:
1415
+ # Validate accumulated_output before parsing
1416
+ assert len(accumulated_output) > 0, "accumulated_output cannot be empty"
1417
+ output_to_parse = accumulated_output[-5000:] # Last 5KB to avoid parsing huge outputs
1418
+ assert isinstance(output_to_parse, str), (
1419
+ f"output_to_parse must be str, got {type(output_to_parse).__name__}"
1420
+ )
1421
+ assert len(output_to_parse) <= len(accumulated_output), (
1422
+ f"output_to_parse length ({len(output_to_parse)}) > accumulated_output length ({len(accumulated_output)})"
1423
+ )
1424
+
1425
+ update_status_from_output(
1426
+ status_tracker,
1427
+ output_to_parse,
1428
+ policy=policy,
1429
+ environment=environment,
1430
+ start_time=job_start_time,
1431
+ )
1432
+ last_status_update_time = current_time
1433
+ assert last_status_update_time >= job_start_time, (
1434
+ f"last_status_update_time ({last_status_update_time}) < job_start_time ({job_start_time})"
1435
+ )
1436
+ except (BrokenPipeError, OSError) as e:
1437
+ # Subprocess may have crashed - log and continue to wait() to get returncode
1438
+ logger.warning(
1439
+ "Error reading subprocess stdout for job %s (process may have crashed): %s",
1440
+ job.job_id,
1441
+ e,
1442
+ )
1443
+ # Continue to process.wait() to get the returncode and any buffered output
1444
+
1445
+ # Wait for process to complete (ALWAYS wait, even if stdout reading failed)
1446
+ assert process is not None, "process is None before wait()"
1447
+ returncode = process.wait()
1448
+
1449
+ # If stdout reading failed but process exited, try to read any remaining buffered output
1450
+ if process.stdout and not stdout_lines:
1451
+ try:
1452
+ remaining_output = process.stdout.read()
1453
+ if remaining_output:
1454
+ stdout_lines.append(remaining_output)
1455
+ accumulated_output += remaining_output
1456
+ logger.info(
1457
+ "Captured remaining subprocess output for job %s after process exit: %d bytes",
1458
+ job.job_id,
1459
+ len(remaining_output),
1460
+ )
1461
+ except Exception as e:
1462
+ logger.warning(
1463
+ "Failed to read remaining subprocess output for job %s: %s",
1464
+ job.job_id,
1465
+ e,
1466
+ )
1467
+ assert isinstance(returncode, int), (
1468
+ f"process.wait() must return int, got {type(returncode).__name__}: {returncode}"
1469
+ )
1470
+
1471
+ # Combine output
1472
+ assert isinstance(stdout_lines, list), (
1473
+ f"stdout_lines must be list, got {type(stdout_lines).__name__}"
1474
+ )
1475
+ assert all(isinstance(line, str) for line in stdout_lines), (
1476
+ f"All stdout_lines must be str, got types: {[type(line).__name__ for line in stdout_lines[:5]]}"
1477
+ )
1478
+
1479
+ stdout = "".join(stdout_lines)
1480
+ assert isinstance(stdout, str), (
1481
+ f"stdout must be str, got {type(stdout).__name__}"
1482
+ )
1483
+ stderr = "" # stderr is redirected to stdout
1484
+ assert isinstance(stderr, str), (
1485
+ f"stderr must be str, got {type(stderr).__name__}"
1486
+ )
1487
+
1488
+ # CRITICAL: If subprocess failed but we have no output, log a warning
1489
+ # This indicates the subprocess crashed before producing any output
1490
+ if returncode != 0 and not stdout:
1491
+ logger.error(
1492
+ "❌ Subprocess for job %s exited with code %d but produced NO output. "
1493
+ "This usually indicates an immediate crash (import error, syntax error, etc.). "
1494
+ "Command: %s",
1495
+ job.job_id,
1496
+ returncode,
1497
+ " ".join(cmd),
1498
+ )
1499
+ # Set a helpful error message
1500
+ stdout = (
1501
+ f"[ERROR] Subprocess crashed immediately with exit code {returncode}. "
1502
+ f"No output captured. This usually indicates:\n"
1503
+ f" 1. Import error (missing module)\n"
1504
+ f" 2. Syntax error in Python code\n"
1505
+ f" 3. Missing executable or PATH issue\n"
1506
+ f" 4. Permission error\n"
1507
+ f"\nCommand: {' '.join(cmd)}\n"
1508
+ f"Working directory: {os.getcwd()}\n"
1509
+ f"Python: {env.get('PYTHON', 'python')}"
1510
+ )
1511
+
1512
+ # Create CompletedProcess-like object for compatibility
1513
+ class CompletedProcess:
1514
+ def __init__(self, returncode: int, stdout: str, stderr: str):
1515
+ assert isinstance(returncode, int), (
1516
+ f"returncode must be int, got {type(returncode).__name__}"
1517
+ )
1518
+ assert isinstance(stdout, str), (
1519
+ f"stdout must be str, got {type(stdout).__name__}"
1520
+ )
1521
+ assert isinstance(stderr, str), (
1522
+ f"stderr must be str, got {type(stderr).__name__}"
1523
+ )
1524
+ self.returncode = returncode
1525
+ self.stdout = stdout
1526
+ self.stderr = stderr
1527
+
1528
+ completed = CompletedProcess(returncode, stdout, stderr)
1529
+ assert isinstance(completed, CompletedProcess), (
1530
+ f"CompletedProcess() must return CompletedProcess, got {type(completed).__name__}"
1531
+ )
1532
+
1533
+ logger.info(
1534
+ "✅ Subprocess completed for job %s:\n"
1535
+ " Return code: %s\n"
1536
+ " Stdout length: %d chars\n"
1537
+ " Stderr length: %d chars",
1538
+ job.job_id,
1539
+ completed.returncode,
1540
+ len(completed.stdout) if completed.stdout else 0,
1541
+ len(completed.stderr) if completed.stderr else 0,
1542
+ )
1543
+
1544
+ # Final status update from complete output
1545
+ assert isinstance(completed.stdout, str), (
1546
+ f"completed.stdout must be str before final update, got {type(completed.stdout).__name__}"
1547
+ )
1548
+ assert len(completed.stdout) > 0 or len(accumulated_output) > 0, (
1549
+ "Must have some output for final status update"
1550
+ )
1551
+
1552
+ # Use accumulated_output if available (more complete), otherwise stdout
1553
+ final_output = accumulated_output if accumulated_output else completed.stdout
1554
+ assert isinstance(final_output, str), (
1555
+ f"final_output must be str, got {type(final_output).__name__}"
1556
+ )
1557
+
1558
+ update_status_from_output(
1559
+ status_tracker,
1560
+ final_output,
1561
+ policy=policy,
1562
+ environment=environment,
1563
+ start_time=job_start_time,
1564
+ )
1565
+ except subprocess.TimeoutExpired as e:
1566
+ logger.error("⏱️ Subprocess TIMEOUT for job %s after %s seconds", job.job_id, e.timeout)
1567
+ raise
1568
+ except Exception as e:
1569
+ logger.error(
1570
+ "❌ Subprocess EXCEPTION for job %s:\n"
1571
+ " Type: %s\n"
1572
+ " Message: %s",
1573
+ job.job_id,
1574
+ type(e).__name__,
1575
+ str(e),
1576
+ exc_info=True,
1577
+ )
1578
+ raise
1579
+ finally:
1580
+ # Stop progress poller
1581
+ if poller_thread and poller_thread.is_alive():
1582
+ poller_stop.set()
1583
+ poller_thread.join(timeout=5)
1584
+ logger.info("📡 Stopped progress poller for job %s", job.job_id)
1585
+
1586
+ # Log full output for debugging - prioritize auth errors
1587
+ logger.info("Training command returncode: %s", completed.returncode)
1588
+
1589
+ # Check for critical errors FIRST - these should cause failure even if returncode is 0
1590
+ stdout_lower = (completed.stdout or "").lower()
1591
+ stderr_lower = (completed.stderr or "").lower()
1592
+ combined_output = (completed.stdout or "") + "\n" + (completed.stderr or "")
1593
+ combined_lower = combined_output.lower()
1594
+
1595
+ # Check for health check failures (common cause of silent failures)
1596
+ health_check_failures = []
1597
+ health_check_details = []
1598
+ if "health check failed" in combined_lower or "aborting due to failing health check" in combined_lower:
1599
+ # Extract full context around health check failure - look for error patterns
1600
+ for source_name, source_text in [("STDOUT", completed.stdout), ("STDERR", completed.stderr)]:
1601
+ if not source_text:
1602
+ continue
1603
+ source_lower = source_text.lower()
1604
+ if "health check" in source_lower:
1605
+ # Find health check failure message
1606
+ idx = source_lower.find("health check")
1607
+ start = max(0, idx - 200)
1608
+ end = min(len(source_text), idx + 500)
1609
+ health_check_failures.append(f"{source_name} (health check context):\n{source_text[start:end]}")
1610
+
1611
+ # Also look for error patterns that might explain WHY it failed
1612
+ # Look for HTTP status codes, error messages, exceptions
1613
+ if "500" in source_text or "internal server error" in source_lower:
1614
+ # Find the 500 error context
1615
+ error_idx = source_lower.find("500") if "500" in source_text else source_lower.find("internal server error")
1616
+ if error_idx >= 0:
1617
+ error_start = max(0, error_idx - 100)
1618
+ error_end = min(len(source_text), error_idx + 800)
1619
+ health_check_details.append(f"{source_name} (500 error details):\n{source_text[error_start:error_end]}")
1620
+
1621
+ # Look for tracebacks or exception messages
1622
+ if "traceback" in source_lower or "exception" in source_lower or "error:" in source_lower:
1623
+ # Find traceback/exception
1624
+ tb_idx = source_lower.find("traceback") if "traceback" in source_lower else (
1625
+ source_lower.find("exception") if "exception" in source_lower else source_lower.find("error:")
1626
+ )
1627
+ if tb_idx >= 0:
1628
+ tb_start = max(0, tb_idx - 50)
1629
+ tb_end = min(len(source_text), tb_idx + 1500) # Get more context for tracebacks
1630
+ health_check_details.append(f"{source_name} (exception/traceback):\n{source_text[tb_start:tb_end]}")
1631
+
1632
+ # Look for specific error messages like "ModuleNotFoundError", "RuntimeError", etc.
1633
+ error_patterns = [
1634
+ r"(ModuleNotFoundError|ImportError|RuntimeError|ValueError|KeyError|AttributeError)[^\n]*",
1635
+ r"Failed to [^\n]+",
1636
+ r"Unable to [^\n]+",
1637
+ r"Missing [^\n]+",
1638
+ ]
1639
+ for pattern in error_patterns:
1640
+ matches = re.finditer(pattern, source_text, re.IGNORECASE | re.MULTILINE)
1641
+ for match in matches:
1642
+ match_start = max(0, match.start() - 100)
1643
+ match_end = min(len(source_text), match.end() + 300)
1644
+ health_check_details.append(f"{source_name} (error pattern '{pattern[:30]}...'):\n{source_text[match_start:match_end]}")
1645
+
1646
+ if health_check_failures:
1647
+ success = False
1648
+ # Build informative error message
1649
+ error_parts = [
1650
+ "Training command failed health check. Task app endpoint returned error.",
1651
+ ]
1652
+ if health_check_details:
1653
+ error_parts.append("See details below for root cause.")
1654
+ else:
1655
+ error_parts.append("Check task app logs and ensure /task_info endpoint is working.")
1656
+
1657
+ error_message = " ".join(error_parts)
1658
+
1659
+ logger.error(
1660
+ "🚨 HEALTH CHECK FAILURE for job %s:\n%s",
1661
+ job.job_id,
1662
+ "\n".join(health_check_failures),
1663
+ )
1664
+
1665
+ if health_check_details:
1666
+ logger.error(
1667
+ "🔍 ROOT CAUSE ANALYSIS for job %s:\n%s",
1668
+ job.job_id,
1669
+ "\n" + "="*80 + "\n".join(health_check_details) + "\n" + "="*80,
1670
+ )
1671
+
1672
+ # Check for authentication-related errors
1673
+ auth_keywords = [
1674
+ "authentication",
1675
+ "authorization",
1676
+ "api key",
1677
+ "api_key",
1678
+ "missing api",
1679
+ "invalid api",
1680
+ "unauthorized",
1681
+ "forbidden",
1682
+ "401",
1683
+ "403",
1684
+ "missing",
1685
+ "not set",
1686
+ "required",
1687
+ ]
1688
+
1689
+ auth_errors = []
1690
+ for keyword in auth_keywords:
1691
+ if keyword in stdout_lower:
1692
+ # Extract context around the keyword
1693
+ idx = stdout_lower.find(keyword)
1694
+ start = max(0, idx - 100)
1695
+ end = min(len(completed.stdout), idx + 200)
1696
+ auth_errors.append(f"STDOUT: ...{completed.stdout[start:end]}...")
1697
+ if keyword in stderr_lower:
1698
+ idx = stderr_lower.find(keyword)
1699
+ start = max(0, idx - 100)
1700
+ end = min(len(completed.stderr), idx + 200)
1701
+ auth_errors.append(f"STDERR: ...{completed.stderr[start:end]}...")
1702
+
1703
+ if auth_errors:
1704
+ logger.error(
1705
+ "🚨 AUTHENTICATION ERRORS DETECTED for job %s:\n%s",
1706
+ job.job_id,
1707
+ "\n".join(auth_errors),
1708
+ )
1709
+
1710
+ # Log full output (especially important for errors)
1711
+ if completed.stdout:
1712
+ if not success:
1713
+ # For errors, log full output
1714
+ logger.error("Training command stdout (FULL, %d chars):\n%s", len(completed.stdout), completed.stdout)
1715
+ else:
1716
+ # For success, log last 2000 chars
1717
+ logger.info("Training command stdout (last 2000 chars):\n%s", completed.stdout[-2000:])
1718
+ else:
1719
+ logger.warning("Training command stdout is EMPTY - command may have exited before producing output")
1720
+
1721
+ if completed.stderr:
1722
+ if not success:
1723
+ # For errors, log full output
1724
+ logger.error("Training command stderr (FULL, %d chars):\n%s", len(completed.stderr), completed.stderr)
1725
+ else:
1726
+ # For success, log last 2000 chars
1727
+ logger.warning("Training command stderr (last 2000 chars):\n%s", completed.stderr[-2000:])
1728
+ else:
1729
+ logger.info("Training command stderr is empty")
1730
+ # Validate inputs before collecting results
1731
+ assert prepared is not None, "prepared cannot be None"
1732
+ assert isinstance(prepared, PreparedConfig), (
1733
+ f"prepared must be PreparedConfig, got {type(prepared).__name__}"
1734
+ )
1735
+ assert isinstance(prepared.results_folder, Path), (
1736
+ f"prepared.results_folder must be Path, got {type(prepared.results_folder).__name__}"
1737
+ )
1738
+ assert isinstance(completed.stdout, str), (
1739
+ f"completed.stdout must be str, got {type(completed.stdout).__name__}"
1740
+ )
1741
+ assert isinstance(completed.stderr, str), (
1742
+ f"completed.stderr must be str, got {type(completed.stderr).__name__}"
1743
+ )
1744
+
1745
+ artifact_summary = collect_result_summary(
1746
+ prepared.results_folder,
1747
+ stdout=completed.stdout,
1748
+ stderr=completed.stderr,
1749
+ )
1750
+ assert isinstance(artifact_summary, ResultSummary), (
1751
+ f"collect_result_summary must return ResultSummary, got {type(artifact_summary).__name__}"
1752
+ )
1753
+
1754
+ artifact_summary.stdout = _truncate(completed.stdout)
1755
+ assert isinstance(artifact_summary.stdout, str), (
1756
+ f"artifact_summary.stdout must be str after truncate, got {type(artifact_summary.stdout).__name__}"
1757
+ )
1758
+ artifact_summary.stderr = _truncate(completed.stderr)
1759
+ assert isinstance(artifact_summary.stderr, str), (
1760
+ f"artifact_summary.stderr must be str after truncate, got {type(artifact_summary.stderr).__name__}"
1761
+ )
1762
+ artifact_summary.returncode = completed.returncode
1763
+ assert isinstance(artifact_summary.returncode, int), (
1764
+ f"artifact_summary.returncode must be int, got {type(artifact_summary.returncode).__name__}"
1765
+ )
1766
+ summary = artifact_summary
1767
+ assert isinstance(summary, ResultSummary), (
1768
+ f"summary must be ResultSummary, got {type(summary).__name__}"
1769
+ )
1770
+
1771
+ # ✅ FIX: If summary.total_rollouts is None, try to fetch from backend metadata stats
1772
+ # This handles cases where CLI output parsing fails but backend has accurate stats
1773
+ if summary.total_rollouts is None and backend_job_id:
1774
+ try:
1775
+ import requests
1776
+
1777
+ config = load_config()
1778
+ backend_url = config.backend_url
1779
+ try:
1780
+ api_key = _load_synth_api_key()
1781
+ except RuntimeError:
1782
+ api_key = None
1783
+
1784
+ if backend_url and api_key:
1785
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{backend_job_id}"
1786
+ headers = {"Authorization": f"Bearer {api_key}"}
1787
+ resp = requests.get(url, headers=headers, timeout=10.0)
1788
+
1789
+ if resp.status_code == 200:
1790
+ backend_job = resp.json()
1791
+ backend_metadata = backend_job.get("metadata", {})
1792
+ backend_stats = backend_metadata.get("stats", {})
1793
+
1794
+ # Try to get total_rollouts from backend stats
1795
+ # Prefer total_rollouts, fallback to sum of optimization + validation rollouts
1796
+ backend_total_rollouts = backend_stats.get("total_rollouts")
1797
+ if backend_total_rollouts is None:
1798
+ opt_rollouts = backend_stats.get("optimization_rollouts_executed", 0) or 0
1799
+ val_rollouts = backend_stats.get("validation_rollouts_executed", 0) or 0
1800
+ if opt_rollouts > 0 or val_rollouts > 0:
1801
+ backend_total_rollouts = opt_rollouts + val_rollouts
1802
+
1803
+ if backend_total_rollouts is not None and backend_total_rollouts > 0:
1804
+ summary.total_rollouts = backend_total_rollouts
1805
+ logger.info(
1806
+ "✅ Extracted total_rollouts=%d from backend metadata stats for job %s (backend_job_id=%s)",
1807
+ backend_total_rollouts,
1808
+ job.job_id,
1809
+ backend_job_id,
1810
+ )
1811
+ except Exception as e:
1812
+ # Log but don't fail - backend fetch is best-effort fallback
1813
+ logger.debug(
1814
+ "Could not fetch backend stats to extract rollouts for job %s: %s",
1815
+ job.job_id,
1816
+ e,
1817
+ )
1818
+
1819
+ # Check if training actually ran - for prompt learning (GEPA/MIPRO), we expect results
1820
+ # Note: success may have been set to False above if health check failed
1821
+ if not error_message: # Only check returncode if we haven't already detected a failure
1822
+ success = completed.returncode == 0
1823
+ if success and job.job_type == "gepa":
1824
+ # GEPA should produce rollouts - that's the primary indicator of success
1825
+ # If returncode is 0 but no rollouts were produced, it failed silently
1826
+ if summary.total_rollouts is None or summary.total_rollouts == 0:
1827
+ success = False
1828
+ error_message = (
1829
+ "Training command exited with returncode 0 but produced no rollouts. "
1830
+ "This indicates GEPA did not actually run. "
1831
+ f"Check stdout/stderr for errors. "
1832
+ f"Results folder: {prepared.results_folder}"
1833
+ )
1834
+ logger.error(
1835
+ "Job %s failed silently: %s\nStdout tail:\n%s\nStderr tail:\n%s",
1836
+ job.job_id,
1837
+ error_message,
1838
+ summary.stdout[-1000:] if summary.stdout else "(empty)",
1839
+ summary.stderr[-1000:] if summary.stderr else "(empty)",
1840
+ )
1841
+ else:
1842
+ # We have rollouts - that's sufficient evidence GEPA ran successfully
1843
+ # Learning curve and stats are nice-to-have but not required
1844
+ logger.info(
1845
+ "Job %s completed successfully with %d rollouts (best_score=%s, learning_curve_points=%d, stats=%s)",
1846
+ job.job_id,
1847
+ summary.total_rollouts,
1848
+ summary.best_score,
1849
+ len(summary.learning_curve_points),
1850
+ "yes" if summary.stats else "no",
1851
+ )
1852
+
1853
+ if not success and not error_message:
1854
+ # Build detailed error message with FULL stdout/stderr
1855
+ error_parts = [f"Training command exited with {completed.returncode}"]
1856
+
1857
+ # Include FULL stdout if available (for errors, we want complete context)
1858
+ if completed.stdout:
1859
+ error_parts.append(f"\n\n{'='*80}\nSTDOUT (FULL, {len(completed.stdout)} chars):\n{'='*80}\n{completed.stdout}")
1860
+ else:
1861
+ error_parts.append("\n\nStdout: (empty - subprocess may have crashed immediately)")
1862
+
1863
+ # Include FULL stderr if available
1864
+ if completed.stderr:
1865
+ error_parts.append(f"\n\n{'='*80}\nSTDERR (FULL, {len(completed.stderr)} chars):\n{'='*80}\n{completed.stderr}")
1866
+ else:
1867
+ error_parts.append("\n\nStderr: (empty)")
1868
+
1869
+ error_message = "".join(error_parts)
1870
+
1871
+ # Log full error (truncate only for logger, but keep full in error_message)
1872
+ logger.error(
1873
+ "Job %s failed: %s\nFull stdout (%d chars):\n%s\nFull stderr (%d chars):\n%s",
1874
+ job.job_id,
1875
+ f"Training command exited with {completed.returncode}",
1876
+ len(completed.stdout) if completed.stdout else 0,
1877
+ completed.stdout if completed.stdout else "(empty)",
1878
+ len(completed.stderr) if completed.stderr else 0,
1879
+ completed.stderr if completed.stderr else "(empty)",
1880
+ )
1881
+ except Exception as exc:
1882
+ error_message = str(exc)
1883
+ summary.stderr = _truncate((summary.stderr or "") + f"\n{error_message}")
1884
+ logger.exception("Job %s encountered error: %s", job.job_id, error_message)
1885
+ finally:
1886
+ if prepared:
1887
+ prepared.cleanup()
1888
+
1889
+ # Prepare execution details for logging
1890
+ command_str = " ".join(cmd) if cmd is not None and len(cmd) > 0 else None
1891
+ working_dir = os.getcwd()
1892
+ if env is not None:
1893
+ python_exe = env.get("PYTHON", "python")
1894
+ env_keys = list(env.keys())
1895
+ else:
1896
+ python_exe = None
1897
+ env_keys = None
1898
+
1899
+ return _finalize_job(
1900
+ job.job_id,
1901
+ summary=summary,
1902
+ success=success,
1903
+ error_message=error_message,
1904
+ command=command_str,
1905
+ working_directory=working_dir,
1906
+ python_executable=python_exe,
1907
+ environment_keys=env_keys,
1908
+ )
1909
+
1910
+
1911
+ @celery_app.task(name="synth_ai.cli.local.experiment_queue.process_experiment_queue")
1912
+ def process_experiment_queue() -> dict[str, Any]:
1913
+ """Periodic task that checks for queued jobs and dispatches them.
1914
+
1915
+ This task runs every 5 seconds (via Celery Beat) to ensure queued jobs
1916
+ are dispatched even if:
1917
+ - Previous dispatch attempts failed
1918
+ - Jobs were queued while other jobs were running
1919
+ - Worker restarted and missed dispatch events
1920
+
1921
+ Returns a summary of dispatched jobs.
1922
+ """
1923
+ # Verify we're using the correct database
1924
+ from .config import load_config
1925
+ config = load_config()
1926
+ env_db_path = os.getenv("EXPERIMENT_QUEUE_DB_PATH")
1927
+ if env_db_path:
1928
+ from pathlib import Path
1929
+ env_db_path_resolved = Path(env_db_path).expanduser().resolve()
1930
+ if config.sqlite_path != env_db_path_resolved:
1931
+ logger.error(
1932
+ "Database path mismatch in periodic task! ENV: %s != CONFIG: %s",
1933
+ env_db_path_resolved,
1934
+ config.sqlite_path,
1935
+ )
1936
+
1937
+ logger.debug("Processing experiment queue for queued jobs (database: %s)", config.sqlite_path)
1938
+ dispatched_count = 0
1939
+ experiments_checked = 0
1940
+
1941
+ with session_scope() as session:
1942
+ # Find all running or queued experiments that might have jobs to dispatch
1943
+ active_experiments = (
1944
+ session.query(Experiment)
1945
+ .filter(
1946
+ Experiment.status.in_([ExperimentStatus.QUEUED, ExperimentStatus.RUNNING])
1947
+ )
1948
+ .all()
1949
+ )
1950
+
1951
+ for experiment in active_experiments:
1952
+ experiments_checked += 1
1953
+ # Check if there are any queued jobs without celery_task_id
1954
+ queued_jobs = (
1955
+ session.query(ExperimentJob)
1956
+ .filter(
1957
+ ExperimentJob.experiment_id == experiment.experiment_id,
1958
+ ExperimentJob.status == ExperimentJobStatus.QUEUED,
1959
+ ExperimentJob.celery_task_id.is_(None),
1960
+ )
1961
+ .count()
1962
+ )
1963
+
1964
+ if queued_jobs > 0:
1965
+ logger.debug(
1966
+ "Found %d queued jobs for experiment %s, attempting dispatch",
1967
+ queued_jobs,
1968
+ experiment.experiment_id,
1969
+ )
1970
+ dispatched = dispatch_available_jobs(session, experiment.experiment_id)
1971
+ dispatched_count += len(dispatched)
1972
+ if dispatched:
1973
+ logger.info(
1974
+ "Dispatched %d jobs for experiment %s",
1975
+ len(dispatched),
1976
+ experiment.experiment_id,
1977
+ )
1978
+
1979
+ result = {
1980
+ "dispatched": dispatched_count,
1981
+ "experiments_checked": experiments_checked,
1982
+ }
1983
+ logger.debug("Queue check completed: %s", result)
1984
+ return result