synth-ai 0.2.14__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (1091) hide show
  1. synth_ai/__init__.py +19 -40
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +105 -70
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/baseline/__init__.py +12 -0
  25. synth_ai/cli/commands/baseline/core.py +636 -0
  26. synth_ai/cli/commands/baseline/list.py +94 -0
  27. synth_ai/cli/commands/demo/__init__.py +3 -0
  28. synth_ai/cli/commands/demo/core.py +153 -0
  29. synth_ai/cli/commands/eval/__init__.py +19 -0
  30. synth_ai/cli/commands/eval/core.py +1113 -0
  31. synth_ai/cli/commands/eval/errors.py +81 -0
  32. synth_ai/cli/commands/eval/validation.py +133 -0
  33. synth_ai/cli/commands/filter/__init__.py +12 -0
  34. synth_ai/cli/commands/filter/core.py +424 -0
  35. synth_ai/cli/commands/filter/errors.py +55 -0
  36. synth_ai/cli/commands/filter/validation.py +77 -0
  37. synth_ai/cli/commands/help/__init__.py +185 -0
  38. synth_ai/cli/commands/help/core.py +72 -0
  39. synth_ai/cli/commands/scan/__init__.py +19 -0
  40. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  41. synth_ai/cli/commands/scan/core.py +344 -0
  42. synth_ai/cli/commands/scan/health_checker.py +242 -0
  43. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  44. synth_ai/cli/commands/scan/models.py +83 -0
  45. synth_ai/cli/commands/smoke/__init__.py +7 -0
  46. synth_ai/cli/commands/smoke/core.py +1438 -0
  47. synth_ai/cli/commands/status/__init__.py +66 -0
  48. synth_ai/cli/commands/status/client.py +192 -0
  49. synth_ai/cli/commands/status/config.py +92 -0
  50. synth_ai/cli/commands/status/errors.py +20 -0
  51. synth_ai/cli/commands/status/formatters.py +164 -0
  52. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  53. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  54. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  55. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  56. synth_ai/cli/commands/status/subcommands/pricing.py +23 -0
  57. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  58. synth_ai/cli/commands/status/subcommands/session.py +182 -0
  59. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  60. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  61. synth_ai/cli/commands/status/utils.py +114 -0
  62. synth_ai/cli/commands/train/__init__.py +53 -0
  63. synth_ai/cli/commands/train/core.py +22 -0
  64. synth_ai/cli/commands/train/errors.py +117 -0
  65. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  66. synth_ai/cli/commands/train/judge_validation.py +305 -0
  67. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  68. synth_ai/cli/commands/train/validation.py +392 -0
  69. synth_ai/cli/demo_apps/__init__.py +10 -0
  70. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  71. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  72. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  73. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  74. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  75. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +742 -0
  80. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  81. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  82. synth_ai/cli/demo_apps/math/_common.py +16 -0
  83. synth_ai/cli/demo_apps/math/app.py +38 -0
  84. synth_ai/cli/demo_apps/math/config.toml +76 -0
  85. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  86. synth_ai/cli/demo_apps/math/modal_task_app.py +702 -0
  87. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  88. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  89. synth_ai/cli/demo_apps/mipro/task_app.py +933 -0
  90. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  91. synth_ai/cli/demos/__init__.py +12 -0
  92. synth_ai/cli/demos/demo.py +32 -0
  93. synth_ai/cli/demos/rl_demo.py +254 -0
  94. synth_ai/cli/deploy.py +216 -0
  95. synth_ai/cli/infra/__init__.py +14 -0
  96. synth_ai/cli/infra/balance.py +216 -0
  97. synth_ai/cli/infra/mcp.py +35 -0
  98. synth_ai/cli/infra/modal_app.py +36 -0
  99. synth_ai/cli/infra/setup.py +69 -0
  100. synth_ai/cli/infra/status.py +16 -0
  101. synth_ai/cli/infra/turso.py +77 -0
  102. synth_ai/cli/lib/__init__.py +10 -0
  103. synth_ai/cli/lib/agents.py +76 -0
  104. synth_ai/cli/lib/apps/modal_app.py +101 -0
  105. synth_ai/cli/lib/apps/task_app.py +643 -0
  106. synth_ai/cli/lib/bin.py +39 -0
  107. synth_ai/cli/lib/env.py +375 -0
  108. synth_ai/cli/lib/errors.py +85 -0
  109. synth_ai/cli/lib/modal.py +315 -0
  110. synth_ai/cli/lib/plotting.py +126 -0
  111. synth_ai/cli/lib/prompt_args.py +39 -0
  112. synth_ai/cli/lib/prompts.py +284 -0
  113. synth_ai/cli/lib/sqld.py +122 -0
  114. synth_ai/cli/lib/task_app_discovery.py +884 -0
  115. synth_ai/cli/lib/task_app_env.py +295 -0
  116. synth_ai/cli/lib/train_cfgs.py +300 -0
  117. synth_ai/cli/lib/tunnel_records.py +207 -0
  118. synth_ai/cli/local/__init__.py +14 -0
  119. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  120. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  121. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  122. synth_ai/cli/local/experiment_queue/config.py +128 -0
  123. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  124. synth_ai/cli/local/experiment_queue/database.py +175 -0
  125. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  126. synth_ai/cli/local/experiment_queue/models.py +231 -0
  127. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  128. synth_ai/cli/local/experiment_queue/results.py +373 -0
  129. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  130. synth_ai/cli/local/experiment_queue/service.py +344 -0
  131. synth_ai/cli/local/experiment_queue/status.py +372 -0
  132. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  133. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  134. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  135. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  136. synth_ai/cli/local/session/__init__.py +92 -0
  137. synth_ai/cli/local/session/client.py +383 -0
  138. synth_ai/cli/local/session/constants.py +63 -0
  139. synth_ai/cli/local/session/exceptions.py +105 -0
  140. synth_ai/cli/local/session/manager.py +139 -0
  141. synth_ai/cli/local/session/models.py +89 -0
  142. synth_ai/cli/local/session/query.py +110 -0
  143. synth_ai/cli/root.py +30 -6
  144. synth_ai/cli/task_apps/__init__.py +26 -0
  145. synth_ai/cli/task_apps/commands.py +3153 -0
  146. synth_ai/cli/task_apps/deploy.py +7 -0
  147. synth_ai/cli/task_apps/list.py +26 -0
  148. synth_ai/cli/task_apps/main.py +36 -0
  149. synth_ai/cli/task_apps/modal_serve.py +11 -0
  150. synth_ai/cli/task_apps/serve.py +11 -0
  151. synth_ai/cli/training/__init__.py +8 -0
  152. synth_ai/cli/training/train.py +5 -0
  153. synth_ai/cli/training/train_cfg.py +34 -0
  154. synth_ai/cli/training/watch.py +506 -0
  155. synth_ai/cli/turso.py +34 -55
  156. synth_ai/cli/usage.py +159 -0
  157. synth_ai/cli/utils/__init__.py +8 -0
  158. synth_ai/cli/utils/experiments.py +235 -0
  159. synth_ai/cli/utils/queue.py +504 -0
  160. synth_ai/cli/utils/recent.py +133 -0
  161. synth_ai/cli/utils/traces.py +164 -0
  162. synth_ai/contracts/__init__.py +67 -0
  163. synth_ai/core/__init__.py +100 -0
  164. synth_ai/core/_utils/__init__.py +54 -0
  165. synth_ai/core/_utils/base_url.py +10 -0
  166. synth_ai/core/_utils/http.py +10 -0
  167. synth_ai/core/_utils/prompts.py +14 -0
  168. synth_ai/core/_utils/task_app_state.py +12 -0
  169. synth_ai/core/_utils/user_config.py +10 -0
  170. synth_ai/core/apps/common.py +116 -0
  171. synth_ai/core/auth.py +95 -0
  172. synth_ai/core/cfgs.py +240 -0
  173. synth_ai/core/config/__init__.py +16 -0
  174. synth_ai/core/config/base.py +168 -0
  175. synth_ai/core/config/resolver.py +89 -0
  176. synth_ai/core/env.py +220 -0
  177. synth_ai/core/errors.py +126 -0
  178. synth_ai/core/http.py +230 -0
  179. synth_ai/core/integrations/__init__.py +11 -0
  180. synth_ai/core/integrations/cloudflare.py +1710 -0
  181. synth_ai/core/integrations/mcp/__init__.py +6 -0
  182. synth_ai/core/integrations/mcp/__main__.py +8 -0
  183. synth_ai/core/integrations/mcp/claude.py +36 -0
  184. synth_ai/core/integrations/mcp/main.py +254 -0
  185. synth_ai/core/integrations/mcp/setup.py +100 -0
  186. synth_ai/core/integrations/modal.py +277 -0
  187. synth_ai/core/json.py +72 -0
  188. synth_ai/core/log_filter.py +99 -0
  189. synth_ai/core/logging.py +82 -0
  190. synth_ai/core/paths.py +107 -0
  191. synth_ai/core/pricing.py +109 -0
  192. synth_ai/core/process.py +233 -0
  193. synth_ai/core/ssl.py +25 -0
  194. synth_ai/core/storage/__init__.py +71 -0
  195. synth_ai/core/task_app_state.py +318 -0
  196. synth_ai/core/telemetry.py +282 -0
  197. synth_ai/core/tracing_v3/__init__.py +99 -0
  198. synth_ai/core/tracing_v3/abstractions.py +302 -0
  199. synth_ai/core/tracing_v3/config.py +229 -0
  200. synth_ai/core/tracing_v3/constants.py +21 -0
  201. synth_ai/core/tracing_v3/db_config.py +182 -0
  202. synth_ai/core/tracing_v3/decorators.py +401 -0
  203. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  204. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  205. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  206. synth_ai/core/tracing_v3/storage/base.py +211 -0
  207. synth_ai/core/tracing_v3/storage/config.py +109 -0
  208. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  209. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  210. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  211. synth_ai/core/tracing_v3/turso/models.py +470 -0
  212. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  213. synth_ai/core/tracing_v3/utils.py +108 -0
  214. synth_ai/core/urls.py +18 -0
  215. synth_ai/core/user_config.py +137 -0
  216. synth_ai/core/uvicorn.py +222 -0
  217. synth_ai/data/__init__.py +110 -0
  218. synth_ai/data/enums.py +141 -0
  219. synth_ai/data/rewards.py +152 -0
  220. synth_ai/data/specs.py +36 -0
  221. synth_ai/data/traces.py +35 -0
  222. synth_ai/products/__init__.py +6 -0
  223. synth_ai/products/graph_evolve/__init__.py +46 -0
  224. synth_ai/products/graph_evolve/client.py +226 -0
  225. synth_ai/products/graph_evolve/config.py +591 -0
  226. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  227. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  228. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  229. synth_ai/products/graph_evolve/run.py +222 -0
  230. synth_ai/sdk/__init__.py +119 -0
  231. synth_ai/sdk/api/__init__.py +1 -0
  232. synth_ai/sdk/api/models/supported.py +514 -0
  233. synth_ai/sdk/api/research_agent/__init__.py +86 -0
  234. synth_ai/sdk/api/research_agent/cli.py +428 -0
  235. synth_ai/sdk/api/research_agent/config.py +357 -0
  236. synth_ai/sdk/api/research_agent/job.py +717 -0
  237. synth_ai/sdk/api/train/__init__.py +85 -0
  238. synth_ai/sdk/api/train/builders.py +895 -0
  239. synth_ai/sdk/api/train/cli.py +2188 -0
  240. synth_ai/sdk/api/train/config_finder.py +267 -0
  241. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  242. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  243. synth_ai/sdk/api/train/configs/rl.py +188 -0
  244. synth_ai/sdk/api/train/configs/sft.py +99 -0
  245. synth_ai/sdk/api/train/configs/shared.py +81 -0
  246. synth_ai/sdk/api/train/context_learning.py +312 -0
  247. synth_ai/sdk/api/train/env_resolver.py +418 -0
  248. synth_ai/sdk/api/train/graph_validators.py +216 -0
  249. synth_ai/sdk/api/train/graphgen.py +984 -0
  250. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  251. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  252. synth_ai/sdk/api/train/pollers.py +124 -0
  253. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  254. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  255. synth_ai/sdk/api/train/progress/events.py +326 -0
  256. synth_ai/sdk/api/train/progress/results.py +428 -0
  257. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  258. synth_ai/sdk/api/train/prompt_learning.py +470 -0
  259. synth_ai/sdk/api/train/rl.py +442 -0
  260. synth_ai/sdk/api/train/sft.py +396 -0
  261. synth_ai/sdk/api/train/summary.py +522 -0
  262. synth_ai/sdk/api/train/supported_algos.py +147 -0
  263. synth_ai/sdk/api/train/task_app.py +331 -0
  264. synth_ai/sdk/api/train/utils.py +279 -0
  265. synth_ai/sdk/api/train/validators.py +2424 -0
  266. synth_ai/sdk/baseline/__init__.py +25 -0
  267. synth_ai/sdk/baseline/config.py +209 -0
  268. synth_ai/sdk/baseline/discovery.py +216 -0
  269. synth_ai/sdk/baseline/execution.py +154 -0
  270. synth_ai/sdk/graphs/__init__.py +15 -0
  271. synth_ai/sdk/graphs/completions.py +570 -0
  272. synth_ai/sdk/inference/__init__.py +6 -0
  273. synth_ai/sdk/inference/client.py +128 -0
  274. synth_ai/sdk/jobs/__init__.py +16 -0
  275. synth_ai/sdk/jobs/client.py +371 -0
  276. synth_ai/sdk/judging/__init__.py +15 -0
  277. synth_ai/sdk/judging/base.py +24 -0
  278. synth_ai/sdk/judging/client.py +191 -0
  279. synth_ai/sdk/judging/schemas.py +222 -0
  280. synth_ai/sdk/learning/__init__.py +69 -0
  281. synth_ai/sdk/learning/client.py +240 -0
  282. synth_ai/sdk/learning/ft_client.py +7 -0
  283. synth_ai/sdk/learning/health.py +49 -0
  284. synth_ai/sdk/learning/jobs.py +202 -0
  285. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  286. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  287. synth_ai/sdk/learning/prompt_learning_types.py +185 -0
  288. synth_ai/sdk/learning/rl/client.py +268 -0
  289. synth_ai/sdk/learning/rl/contracts.py +27 -0
  290. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  291. synth_ai/sdk/learning/rl/secrets.py +13 -0
  292. synth_ai/sdk/learning/sft/client.py +95 -0
  293. synth_ai/sdk/learning/sft/config.py +270 -0
  294. synth_ai/sdk/learning/sft/data.py +698 -0
  295. synth_ai/sdk/learning/validators.py +52 -0
  296. synth_ai/sdk/research_agent/__init__.py +34 -0
  297. synth_ai/sdk/research_agent/container_builder.py +328 -0
  298. synth_ai/sdk/research_agent/container_spec.py +198 -0
  299. synth_ai/sdk/research_agent/defaults.py +34 -0
  300. synth_ai/sdk/research_agent/results_collector.py +69 -0
  301. synth_ai/sdk/specs/__init__.py +46 -0
  302. synth_ai/sdk/specs/dataclasses.py +149 -0
  303. synth_ai/sdk/specs/loader.py +144 -0
  304. synth_ai/sdk/specs/serializer.py +199 -0
  305. synth_ai/sdk/specs/validation.py +250 -0
  306. synth_ai/sdk/streaming/__init__.py +35 -0
  307. synth_ai/sdk/streaming/config.py +94 -0
  308. synth_ai/sdk/streaming/handlers.py +1997 -0
  309. synth_ai/sdk/streaming/streamer.py +704 -0
  310. synth_ai/sdk/streaming/types.py +112 -0
  311. synth_ai/sdk/task/__init__.py +151 -0
  312. synth_ai/sdk/task/apps/__init__.py +133 -0
  313. synth_ai/sdk/task/config.py +261 -0
  314. synth_ai/sdk/task/contracts.py +298 -0
  315. synth_ai/sdk/task/datasets.py +108 -0
  316. synth_ai/sdk/task/in_process.py +1190 -0
  317. synth_ai/sdk/task/in_process_runner.py +309 -0
  318. synth_ai/sdk/task/inference_api.py +299 -0
  319. synth_ai/sdk/task/proxy.py +287 -0
  320. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  321. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  322. synth_ai/sdk/task/rubrics.py +219 -0
  323. synth_ai/sdk/task/server.py +580 -0
  324. synth_ai/sdk/task/trace_correlation_helpers.py +506 -0
  325. synth_ai/sdk/task/tracing_utils.py +95 -0
  326. synth_ai/sdk/task/validators.py +456 -0
  327. synth_ai/sdk/tracing/__init__.py +39 -0
  328. synth_ai/sdk/training/__init__.py +102 -0
  329. synth_ai/sdk/usage/__init__.py +37 -0
  330. synth_ai/sdk/usage/client.py +171 -0
  331. synth_ai/sdk/usage/models.py +261 -0
  332. synth_ai/utils/__init__.py +213 -0
  333. synth_ai-0.4.1.dist-info/METADATA +195 -0
  334. synth_ai-0.4.1.dist-info/RECORD +379 -0
  335. synth_ai-0.4.1.dist-info/top_level.txt +1 -0
  336. examples/__init__.py +0 -16
  337. examples/analyze_semantic_words.sh +0 -17
  338. examples/crafter_debug_render.py +0 -186
  339. examples/dev/qwen3_32b_qlora_4xh100.toml +0 -40
  340. examples/multi_step/configs/README_verilog_rl.md +0 -77
  341. examples/multi_step/configs/VERILOG_REWARDS.md +0 -90
  342. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +0 -183
  343. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +0 -35
  344. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +0 -36
  345. examples/multi_step/configs/crafter_rl_outcome.toml +0 -74
  346. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +0 -187
  347. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +0 -83
  348. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +0 -78
  349. examples/multi_step/configs/crafter_synth_backend.md +0 -40
  350. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +0 -31
  351. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +0 -33
  352. examples/multi_step/configs/verilog_rl_lora.toml +0 -190
  353. examples/multi_step/crafter_rl_lora.md +0 -70
  354. examples/multi_step/judges/crafter_backend_judge.py +0 -220
  355. examples/multi_step/judges/verilog_backend_judge.py +0 -234
  356. examples/multi_step/readme.md +0 -48
  357. examples/multi_step/sse_metrics_streaming_notes.md +0 -357
  358. examples/multi_step/task_app_config_notes.md +0 -494
  359. examples/multi_step/verilog_rl_lora.md +0 -218
  360. examples/qwen_coder/README.md +0 -102
  361. examples/qwen_coder/_shared.py +0 -113
  362. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  363. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  364. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  365. examples/qwen_coder/generate_dataset.py +0 -98
  366. examples/qwen_coder/infer_ft_smoke.py +0 -65
  367. examples/qwen_coder/infer_prod_proxy.py +0 -73
  368. examples/qwen_coder/infer_via_synth.py +0 -87
  369. examples/qwen_coder/scripts/infer_coder.sh +0 -19
  370. examples/qwen_coder/scripts/train_coder_30b.sh +0 -22
  371. examples/qwen_coder/sft_full_17b.py +0 -103
  372. examples/qwen_coder/sft_lora_30b.py +0 -110
  373. examples/qwen_coder/subset_jsonl.py +0 -39
  374. examples/qwen_coder/todos.md +0 -38
  375. examples/qwen_coder/validate_jsonl.py +0 -60
  376. examples/rl/README.md +0 -169
  377. examples/rl/download_dataset.py +0 -80
  378. examples/run_crafter_demo.sh +0 -10
  379. examples/sft/README.md +0 -139
  380. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  381. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  382. examples/sft/evaluate.py +0 -119
  383. examples/sft/export_dataset.py +0 -117
  384. examples/sft/generate_traces.py +0 -164
  385. examples/swe/__init__.py +0 -12
  386. examples/swe/task_app/README.md +0 -105
  387. examples/swe/task_app/__init__.py +0 -2
  388. examples/swe/task_app/grpo_swe_mini.py +0 -601
  389. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  390. examples/swe/task_app/hosted/README.md +0 -173
  391. examples/swe/task_app/hosted/__init__.py +0 -5
  392. examples/swe/task_app/hosted/branching.py +0 -143
  393. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  394. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  395. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  396. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  397. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  398. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  399. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  400. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  401. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  402. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  403. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  404. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  405. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  406. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  407. examples/swe/task_app/hosted/hosted_app.py +0 -204
  408. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  409. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  410. examples/swe/task_app/hosted/main.py +0 -100
  411. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  412. examples/swe/task_app/hosted/registry.py +0 -195
  413. examples/swe/task_app/hosted/rollout.py +0 -1911
  414. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  415. examples/swe/task_app/hosted/storage/volume.py +0 -211
  416. examples/swe/task_app/hosted/test_agents.py +0 -161
  417. examples/swe/task_app/hosted/test_service.py +0 -136
  418. examples/swe/task_app/hosted/utils.py +0 -62
  419. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +0 -258
  420. examples/task_apps/TESTING.md +0 -275
  421. examples/task_apps/crafter/CREATE_SFT_DATASET.md +0 -273
  422. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +0 -152
  423. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +0 -174
  424. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +0 -268
  425. examples/task_apps/crafter/QUERY_EXAMPLES.md +0 -203
  426. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +0 -316
  427. examples/task_apps/crafter/__init__.py +0 -0
  428. examples/task_apps/crafter/eval_image_only_gpt4o.toml +0 -28
  429. examples/task_apps/crafter/eval_text_only_groq_llama.toml +0 -36
  430. examples/task_apps/crafter/filter_sft_dataset.toml +0 -16
  431. examples/task_apps/crafter/task_app/README.md +0 -42
  432. examples/task_apps/crafter/task_app/__init__.py +0 -5
  433. examples/task_apps/crafter/task_app/grpo_crafter.py +0 -973
  434. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +0 -146
  435. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +0 -173
  436. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +0 -5
  437. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +0 -143
  438. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  439. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  440. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  441. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  442. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -532
  443. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -547
  444. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -123
  445. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  446. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  447. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +0 -204
  448. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  449. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +0 -704
  450. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +0 -100
  451. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +0 -1152
  452. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +0 -195
  453. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +0 -2160
  454. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  455. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +0 -211
  456. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +0 -161
  457. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +0 -136
  458. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +0 -218
  459. examples/task_apps/dev/pokemon_emerald/__init__.py +0 -2
  460. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +0 -811
  461. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +0 -120
  462. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +0 -160
  463. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +0 -155
  464. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +0 -69
  465. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +0 -96
  466. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +0 -1502
  467. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +0 -4
  468. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +0 -68
  469. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +0 -216
  470. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +0 -35
  471. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +0 -631
  472. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +0 -1544
  473. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +0 -1428
  474. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +0 -4848
  475. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +0 -41
  476. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +0 -298
  477. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +0 -95
  478. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +0 -204
  479. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  480. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +0 -2152
  481. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +0 -429
  482. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +0 -155
  483. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +0 -78
  484. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  485. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +0 -122
  486. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +0 -76
  487. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +0 -413
  488. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +0 -204
  489. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +0 -133
  490. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +0 -229
  491. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +0 -300
  492. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +0 -205
  493. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +0 -200
  494. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +0 -284
  495. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +0 -468
  496. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +0 -575
  497. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +0 -311
  498. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +0 -259
  499. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  500. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +0 -372
  501. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +0 -296
  502. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +0 -275
  503. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +0 -22
  504. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +0 -44
  505. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +0 -514
  506. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +0 -415
  507. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +0 -1763
  508. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +0 -33
  509. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +0 -106
  510. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +0 -334
  511. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +0 -1020
  512. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +0 -188
  513. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +0 -1481
  514. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +0 -862
  515. examples/task_apps/dev/pokemon_emerald/modal_app.py +0 -114
  516. examples/task_apps/dev/pokemon_emerald/task_app/README.md +0 -81
  517. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +0 -6
  518. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +0 -685
  519. examples/task_apps/enron/__init__.py +0 -1
  520. examples/task_apps/enron/eval_groq_qwen32.toml +0 -16
  521. examples/task_apps/enron/filter_sft.toml +0 -5
  522. examples/task_apps/enron/task_app/README.md +0 -14
  523. examples/task_apps/enron/task_app/__init__.py +0 -1
  524. examples/task_apps/enron/task_app/grpo_enron.py +0 -906
  525. examples/task_apps/enron/task_app/grpo_enron_task_app.py +0 -146
  526. examples/task_apps/enron/tests/__init__.py +0 -4
  527. examples/task_apps/enron/tests/conftest.py +0 -115
  528. examples/task_apps/enron/tests/integration/__init__.py +0 -4
  529. examples/task_apps/enron/tests/integration/test_enron_eval.py +0 -179
  530. examples/task_apps/enron/tests/integration/test_enron_rollout.py +0 -135
  531. examples/task_apps/enron/tests/unit/__init__.py +0 -4
  532. examples/task_apps/enron/tests/unit/test_enron_environment.py +0 -126
  533. examples/task_apps/math/README.md +0 -22
  534. examples/task_apps/math/__init__.py +0 -0
  535. examples/task_apps/math/math_single_step.py +0 -1000
  536. examples/task_apps/math/math_task_app.py +0 -115
  537. examples/task_apps/pokemon_battle/__init__.py +0 -2
  538. examples/task_apps/pokemon_battle/modal_app.py +0 -104
  539. examples/task_apps/pokemon_battle/task_app/README.md +0 -68
  540. examples/task_apps/pokemon_battle/task_app/__init__.py +0 -6
  541. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +0 -932
  542. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +0 -283
  543. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +0 -155
  544. examples/task_apps/pokemon_red/README.md +0 -357
  545. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +0 -415
  546. examples/task_apps/pokemon_red/__init__.py +0 -3
  547. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +0 -29
  548. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +0 -225
  549. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +0 -75
  550. examples/task_apps/pokemon_red/task_app.py +0 -799
  551. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +0 -193
  552. examples/task_apps/sokoban/README.md +0 -307
  553. examples/task_apps/sokoban/__init__.py +0 -3
  554. examples/task_apps/sokoban/eval_groq_qwen32.toml +0 -16
  555. examples/task_apps/sokoban/eval_openai_gpt5.toml +0 -16
  556. examples/task_apps/sokoban/filter_sft.toml +0 -5
  557. examples/task_apps/sokoban/task_app.py +0 -1058
  558. examples/task_apps/sokoban/tests/__init__.py +0 -4
  559. examples/task_apps/sokoban/tests/conftest.py +0 -113
  560. examples/task_apps/sokoban/tests/integration/__init__.py +0 -4
  561. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +0 -57
  562. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +0 -198
  563. examples/task_apps/sokoban/tests/unit/__init__.py +0 -4
  564. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +0 -114
  565. examples/task_apps/verilog/__init__.py +0 -1
  566. examples/task_apps/verilog/eval_groq_qwen32b.toml +0 -24
  567. examples/task_apps/verilog/filter_sft.toml +0 -5
  568. examples/task_apps/verilog/task_app/README.md +0 -12
  569. examples/task_apps/verilog/task_app/__init__.py +0 -1
  570. examples/task_apps/verilog/task_app/grpo_verilog.py +0 -1166
  571. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +0 -145
  572. examples/task_apps/verilog/tests/__init__.py +0 -4
  573. examples/task_apps/verilog/tests/conftest.py +0 -115
  574. examples/task_apps/verilog/tests/integration/__init__.py +0 -4
  575. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +0 -181
  576. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +0 -55
  577. examples/task_apps/verilog/tests/unit/__init__.py +0 -4
  578. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +0 -118
  579. examples/vlm/PROPOSAL.md +0 -53
  580. examples/vlm/README.md +0 -68
  581. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  582. examples/vlm/crafter_image_only_agent.py +0 -207
  583. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  584. examples/vlm/filter_image_rows.py +0 -63
  585. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  586. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  587. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  588. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  589. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  590. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  591. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  592. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +0 -35
  593. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +0 -26
  594. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +0 -36
  595. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +0 -32
  596. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  597. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  598. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  599. examples/warming_up_to_rl/groq_test.py +0 -97
  600. examples/warming_up_to_rl/manage_secrets.py +0 -131
  601. examples/warming_up_to_rl/old/event_rewards.md +0 -234
  602. examples/warming_up_to_rl/old/notes.md +0 -73
  603. examples/warming_up_to_rl/readme.md +0 -179
  604. examples/warming_up_to_rl/run_eval.py +0 -736
  605. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  606. examples/warming_up_to_rl/run_local_rollout.py +0 -239
  607. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -248
  608. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -405
  609. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -477
  610. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  611. examples/warming_up_to_rl/run_rollout_remote.py +0 -156
  612. examples/workflows/__init__.py +0 -0
  613. examples/workflows/math_rl/__init__.py +0 -0
  614. examples/workflows/math_rl/configs/eval_base_qwen.toml +0 -15
  615. examples/workflows/math_rl/configs/eval_rl_qwen.toml +0 -11
  616. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +0 -35
  617. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +0 -74
  618. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +0 -35
  619. examples/workflows/math_rl/download_dataset.py +0 -80
  620. examples/workflows/math_rl/run_eval.py +0 -436
  621. examples/workflows/math_rl/run_rl_and_save.py +0 -111
  622. synth_ai/api/models/supported.py +0 -377
  623. synth_ai/api/train/__init__.py +0 -5
  624. synth_ai/api/train/builders.py +0 -351
  625. synth_ai/api/train/cli.py +0 -635
  626. synth_ai/api/train/config_finder.py +0 -228
  627. synth_ai/api/train/configs/__init__.py +0 -44
  628. synth_ai/api/train/configs/rl.py +0 -134
  629. synth_ai/api/train/configs/sft.py +0 -95
  630. synth_ai/api/train/configs/shared.py +0 -24
  631. synth_ai/api/train/env_resolver.py +0 -349
  632. synth_ai/api/train/pollers.py +0 -75
  633. synth_ai/api/train/supported_algos.py +0 -147
  634. synth_ai/api/train/task_app.py +0 -195
  635. synth_ai/api/train/utils.py +0 -225
  636. synth_ai/cli/_modal_wrapper.py +0 -29
  637. synth_ai/cli/_storage.py +0 -20
  638. synth_ai/cli/_typer_patch.py +0 -49
  639. synth_ai/cli/_validate_task_app.py +0 -11
  640. synth_ai/cli/balance.py +0 -216
  641. synth_ai/cli/calc.py +0 -84
  642. synth_ai/cli/demo.py +0 -165
  643. synth_ai/cli/legacy_root_backup.py +0 -468
  644. synth_ai/cli/man.py +0 -106
  645. synth_ai/cli/recent.py +0 -132
  646. synth_ai/cli/rl_demo.py +0 -254
  647. synth_ai/cli/status.py +0 -134
  648. synth_ai/cli/task_apps.py +0 -4523
  649. synth_ai/cli/traces.py +0 -164
  650. synth_ai/cli/tui.py +0 -57
  651. synth_ai/cli/watch.py +0 -506
  652. synth_ai/compound/cais.py +0 -0
  653. synth_ai/config/base_url.py +0 -107
  654. synth_ai/core/experiment.py +0 -13
  655. synth_ai/core/system.py +0 -15
  656. synth_ai/demo_registry.py +0 -295
  657. synth_ai/demos/core/__init__.py +0 -1
  658. synth_ai/demos/core/cli.py +0 -1718
  659. synth_ai/demos/demo_task_apps/core.py +0 -440
  660. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -184
  661. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  662. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  663. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  664. synth_ai/environments/__init__.py +0 -31
  665. synth_ai/environments/environment/__init__.py +0 -1
  666. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  667. synth_ai/environments/environment/artifacts/base.py +0 -52
  668. synth_ai/environments/environment/core.py +0 -67
  669. synth_ai/environments/environment/db/__init__.py +0 -1
  670. synth_ai/environments/environment/db/sqlite.py +0 -45
  671. synth_ai/environments/environment/registry.py +0 -233
  672. synth_ai/environments/environment/resources/sqlite.py +0 -45
  673. synth_ai/environments/environment/results.py +0 -1
  674. synth_ai/environments/environment/rewards/__init__.py +0 -1
  675. synth_ai/environments/environment/rewards/core.py +0 -29
  676. synth_ai/environments/environment/shared_engine.py +0 -26
  677. synth_ai/environments/environment/tools/__init__.py +0 -200
  678. synth_ai/environments/examples/__init__.py +0 -1
  679. synth_ai/environments/examples/bandit/__init__.py +0 -33
  680. synth_ai/environments/examples/bandit/engine.py +0 -302
  681. synth_ai/environments/examples/bandit/environment.py +0 -194
  682. synth_ai/environments/examples/bandit/taskset.py +0 -200
  683. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  684. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  685. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  686. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  687. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  688. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  689. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  690. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  691. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
  692. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  693. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  694. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  695. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  696. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  697. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  698. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  699. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  700. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  701. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  702. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  703. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  704. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
  705. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  706. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  707. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  708. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  709. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  710. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  711. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  712. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  713. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  714. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  715. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  716. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  717. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  718. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  719. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  720. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  721. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  722. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  723. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  724. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  725. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  726. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  727. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  728. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  729. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  730. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  731. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  732. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  733. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  734. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  735. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  736. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  737. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  738. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  739. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  740. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  741. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  742. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  743. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  744. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  745. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  746. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  747. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  748. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  749. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  750. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  751. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  752. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  753. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  754. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  755. synth_ai/environments/examples/crafter_classic/environment.py +0 -495
  756. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  757. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  758. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  759. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  760. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  761. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  762. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  763. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  764. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  765. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  766. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  767. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  768. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  769. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  770. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  771. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  772. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  773. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  774. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  775. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  776. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  777. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  778. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  779. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  780. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  781. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  782. synth_ai/environments/examples/enron/engine.py +0 -300
  783. synth_ai/environments/examples/enron/environment.py +0 -234
  784. synth_ai/environments/examples/enron/taskset.py +0 -112
  785. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  786. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  787. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  788. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  789. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  790. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  791. synth_ai/environments/examples/minigrid/engine.py +0 -589
  792. synth_ai/environments/examples/minigrid/environment.py +0 -274
  793. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  794. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  795. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  796. synth_ai/environments/examples/nethack/__init__.py +0 -7
  797. synth_ai/environments/examples/nethack/achievements.py +0 -337
  798. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  799. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  800. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  801. synth_ai/environments/examples/nethack/engine.py +0 -739
  802. synth_ai/environments/examples/nethack/environment.py +0 -256
  803. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  804. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  805. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  806. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  807. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  808. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  809. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  810. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  811. synth_ai/environments/examples/nethack/taskset.py +0 -323
  812. synth_ai/environments/examples/red/__init__.py +0 -7
  813. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  814. synth_ai/environments/examples/red/config_logging.py +0 -110
  815. synth_ai/environments/examples/red/engine.py +0 -721
  816. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  817. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -35
  818. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  819. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  820. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  821. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  822. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  823. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  824. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  825. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  826. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  827. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +0 -477
  828. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  829. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  830. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  831. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  832. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  833. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -172
  834. synth_ai/environments/examples/red/environment.py +0 -298
  835. synth_ai/environments/examples/red/taskset.py +0 -79
  836. synth_ai/environments/examples/red/units/__init__.py +0 -1
  837. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  838. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  839. synth_ai/environments/examples/sokoban/engine.py +0 -678
  840. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  841. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  842. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  843. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  844. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  845. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  846. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  847. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  848. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  849. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  850. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  851. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  852. synth_ai/environments/examples/sokoban/environment.py +0 -229
  853. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  854. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  855. synth_ai/environments/examples/sokoban/taskset.py +0 -544
  856. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  857. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  858. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  859. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  860. synth_ai/environments/examples/verilog/__init__.py +0 -10
  861. synth_ai/environments/examples/verilog/engine.py +0 -421
  862. synth_ai/environments/examples/verilog/environment.py +0 -350
  863. synth_ai/environments/examples/verilog/taskset.py +0 -420
  864. synth_ai/environments/examples/wordle/__init__.py +0 -29
  865. synth_ai/environments/examples/wordle/engine.py +0 -398
  866. synth_ai/environments/examples/wordle/environment.py +0 -159
  867. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  868. synth_ai/environments/examples/wordle/taskset.py +0 -230
  869. synth_ai/environments/reproducibility/core.py +0 -42
  870. synth_ai/environments/reproducibility/helpers.py +0 -0
  871. synth_ai/environments/reproducibility/tree.py +0 -363
  872. synth_ai/environments/service/app.py +0 -97
  873. synth_ai/environments/service/core_routes.py +0 -1021
  874. synth_ai/environments/service/external_registry.py +0 -56
  875. synth_ai/environments/service/registry.py +0 -9
  876. synth_ai/environments/stateful/__init__.py +0 -1
  877. synth_ai/environments/stateful/core.py +0 -163
  878. synth_ai/environments/stateful/engine.py +0 -21
  879. synth_ai/environments/stateful/state.py +0 -7
  880. synth_ai/environments/tasks/api.py +0 -19
  881. synth_ai/environments/tasks/core.py +0 -81
  882. synth_ai/environments/tasks/filters.py +0 -40
  883. synth_ai/environments/tasks/utils.py +0 -90
  884. synth_ai/environments/v0_observability/history.py +0 -3
  885. synth_ai/environments/v0_observability/log.py +0 -2
  886. synth_ai/evals/__init__.py +0 -15
  887. synth_ai/evals/base.py +0 -13
  888. synth_ai/evals/client.py +0 -82
  889. synth_ai/handshake.py +0 -109
  890. synth_ai/http.py +0 -26
  891. synth_ai/http_client.py +0 -136
  892. synth_ai/inference/__init__.py +0 -5
  893. synth_ai/inference/client.py +0 -34
  894. synth_ai/jobs/client.py +0 -295
  895. synth_ai/judge_schemas.py +0 -127
  896. synth_ai/learning/__init__.py +0 -59
  897. synth_ai/learning/client.py +0 -241
  898. synth_ai/learning/ft_client.py +0 -7
  899. synth_ai/learning/health.py +0 -49
  900. synth_ai/learning/jobs.py +0 -201
  901. synth_ai/learning/rl/client.py +0 -267
  902. synth_ai/learning/rl/contracts.py +0 -27
  903. synth_ai/learning/rl/env_keys.py +0 -166
  904. synth_ai/learning/rl/secrets.py +0 -13
  905. synth_ai/learning/sft/client.py +0 -68
  906. synth_ai/learning/sft/config.py +0 -270
  907. synth_ai/learning/sft/data.py +0 -295
  908. synth_ai/learning/validators.py +0 -49
  909. synth_ai/lm/__init__.py +0 -25
  910. synth_ai/task/__init__.py +0 -121
  911. synth_ai/task/apps/__init__.py +0 -129
  912. synth_ai/task/config.py +0 -257
  913. synth_ai/task/contracts.py +0 -236
  914. synth_ai/task/datasets.py +0 -108
  915. synth_ai/task/proxy.py +0 -251
  916. synth_ai/task/rubrics/__init__.py +0 -56
  917. synth_ai/task/rubrics/loaders.py +0 -152
  918. synth_ai/task/server.py +0 -432
  919. synth_ai/task/trace_correlation_helpers.py +0 -315
  920. synth_ai/task/tracing_utils.py +0 -84
  921. synth_ai/task/validators.py +0 -418
  922. synth_ai/tracing_v3/__init__.py +0 -97
  923. synth_ai/tracing_v3/abstractions.py +0 -302
  924. synth_ai/tracing_v3/config.py +0 -84
  925. synth_ai/tracing_v3/db_config.py +0 -194
  926. synth_ai/tracing_v3/decorators.py +0 -398
  927. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -391
  928. synth_ai/tracing_v3/migration_helper.py +0 -120
  929. synth_ai/tracing_v3/session_tracer.py +0 -540
  930. synth_ai/tracing_v3/storage/base.py +0 -210
  931. synth_ai/tracing_v3/storage/config.py +0 -75
  932. synth_ai/tracing_v3/storage/factory.py +0 -39
  933. synth_ai/tracing_v3/trace_utils.py +0 -317
  934. synth_ai/tracing_v3/turso/daemon.py +0 -151
  935. synth_ai/tracing_v3/turso/models.py +0 -469
  936. synth_ai/tracing_v3/turso/native_manager.py +0 -1209
  937. synth_ai/tracing_v3/utils.py +0 -108
  938. synth_ai/tui/__init__.py +0 -5
  939. synth_ai/tui/__main__.py +0 -13
  940. synth_ai/tui/cli/__init__.py +0 -1
  941. synth_ai/tui/cli/query_experiments.py +0 -164
  942. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  943. synth_ai/tui/dashboard.py +0 -906
  944. synth_ai/v0/api/__init__.py +0 -8
  945. synth_ai/v0/api/models/__init__.py +0 -8
  946. synth_ai/v0/api/models/supported.py +0 -8
  947. synth_ai/v0/config/__init__.py +0 -15
  948. synth_ai/v0/config/base_url.py +0 -12
  949. synth_ai/v0/lm/__init__.py +0 -51
  950. synth_ai/v0/lm/caching/__init__.py +0 -0
  951. synth_ai/v0/lm/caching/constants.py +0 -6
  952. synth_ai/v0/lm/caching/dbs.py +0 -0
  953. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  954. synth_ai/v0/lm/caching/handler.py +0 -137
  955. synth_ai/v0/lm/caching/initialize.py +0 -11
  956. synth_ai/v0/lm/caching/persistent.py +0 -114
  957. synth_ai/v0/lm/config.py +0 -115
  958. synth_ai/v0/lm/constants.py +0 -32
  959. synth_ai/v0/lm/core/__init__.py +0 -8
  960. synth_ai/v0/lm/core/all.py +0 -73
  961. synth_ai/v0/lm/core/exceptions.py +0 -5
  962. synth_ai/v0/lm/core/main.py +0 -331
  963. synth_ai/v0/lm/core/main_v3.py +0 -594
  964. synth_ai/v0/lm/core/synth_models.py +0 -35
  965. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  966. synth_ai/v0/lm/cost/__init__.py +0 -0
  967. synth_ai/v0/lm/cost/monitor.py +0 -1
  968. synth_ai/v0/lm/cost/statefulness.py +0 -1
  969. synth_ai/v0/lm/injection.py +0 -80
  970. synth_ai/v0/lm/overrides.py +0 -206
  971. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  972. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  973. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  974. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  975. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  976. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  977. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  978. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  979. synth_ai/v0/lm/tools/__init__.py +0 -3
  980. synth_ai/v0/lm/tools/base.py +0 -172
  981. synth_ai/v0/lm/unified_interface.py +0 -202
  982. synth_ai/v0/lm/vendors/__init__.py +0 -0
  983. synth_ai/v0/lm/vendors/base.py +0 -81
  984. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  985. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  986. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  987. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  988. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  989. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  990. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  991. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  992. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  993. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  994. synth_ai/v0/lm/vendors/retries.py +0 -22
  995. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  996. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  997. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  998. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  999. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  1000. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  1001. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  1002. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  1003. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  1004. synth_ai/v0/lm/warmup.py +0 -186
  1005. synth_ai/v0/tracing/__init__.py +0 -0
  1006. synth_ai/v0/tracing/abstractions.py +0 -224
  1007. synth_ai/v0/tracing/base_client.py +0 -91
  1008. synth_ai/v0/tracing/client_manager.py +0 -131
  1009. synth_ai/v0/tracing/config.py +0 -142
  1010. synth_ai/v0/tracing/context.py +0 -146
  1011. synth_ai/v0/tracing/decorators.py +0 -682
  1012. synth_ai/v0/tracing/events/__init__.py +0 -0
  1013. synth_ai/v0/tracing/events/manage.py +0 -147
  1014. synth_ai/v0/tracing/events/scope.py +0 -86
  1015. synth_ai/v0/tracing/events/store.py +0 -228
  1016. synth_ai/v0/tracing/immediate_client.py +0 -151
  1017. synth_ai/v0/tracing/local.py +0 -18
  1018. synth_ai/v0/tracing/log_client_base.py +0 -73
  1019. synth_ai/v0/tracing/retry_queue.py +0 -186
  1020. synth_ai/v0/tracing/trackers.py +0 -515
  1021. synth_ai/v0/tracing/upload.py +0 -409
  1022. synth_ai/v0/tracing/utils.py +0 -9
  1023. synth_ai/v0/tracing_v1/__init__.py +0 -16
  1024. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  1025. synth_ai/v0/tracing_v1/base_client.py +0 -91
  1026. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  1027. synth_ai/v0/tracing_v1/config.py +0 -142
  1028. synth_ai/v0/tracing_v1/context.py +0 -146
  1029. synth_ai/v0/tracing_v1/decorators.py +0 -703
  1030. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  1031. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  1032. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  1033. synth_ai/v0/tracing_v1/events/store.py +0 -228
  1034. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  1035. synth_ai/v0/tracing_v1/local.py +0 -18
  1036. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  1037. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  1038. synth_ai/v0/tracing_v1/trackers.py +0 -515
  1039. synth_ai/v0/tracing_v1/upload.py +0 -527
  1040. synth_ai/v0/tracing_v1/utils.py +0 -9
  1041. synth_ai/v0/tracing_v3/__init__.py +0 -10
  1042. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  1043. synth_ai/v0/tracing_v3/decorators.py +0 -3
  1044. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  1045. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  1046. synth_ai-0.2.14.dist-info/METADATA +0 -139
  1047. synth_ai-0.2.14.dist-info/RECORD +0 -762
  1048. synth_ai-0.2.14.dist-info/top_level.txt +0 -2
  1049. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  1050. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  1051. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  1052. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  1053. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  1054. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  1055. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  1056. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/config.toml +0 -0
  1057. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  1058. {examples/task_apps → synth_ai/core/apps}/__init__.py +0 -0
  1059. /synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +0 -0
  1060. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  1061. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  1062. /synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +0 -0
  1063. /synth_ai/{tracing_v3 → core/tracing_v3}/serialization.py +0 -0
  1064. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  1065. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  1066. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  1067. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +0 -0
  1068. /synth_ai/{tracing_v3 → core/tracing_v3}/turso/__init__.py +0 -0
  1069. /synth_ai/{evals → sdk/judging}/types.py +0 -0
  1070. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  1071. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  1072. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  1073. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  1074. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  1075. /synth_ai/{learning → sdk/learning}/rl/__init__.py +0 -0
  1076. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  1077. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  1078. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  1079. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  1080. /synth_ai/{task → sdk/task}/auth.py +0 -0
  1081. /synth_ai/{task → sdk/task}/client.py +0 -0
  1082. /synth_ai/{task → sdk/task}/errors.py +0 -0
  1083. /synth_ai/{task → sdk/task}/health.py +0 -0
  1084. /synth_ai/{task → sdk/task}/json.py +0 -0
  1085. /synth_ai/{task → sdk/task}/rubrics/models.py +0 -0
  1086. /synth_ai/{task → sdk/task}/rubrics/scoring.py +0 -0
  1087. /synth_ai/{task → sdk/task}/rubrics/strict.py +0 -0
  1088. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  1089. {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/WHEEL +0 -0
  1090. {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/entry_points.txt +0 -0
  1091. {synth_ai-0.2.14.dist-info → synth_ai-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1984 @@
1
+ """Celery task definitions for running experiment jobs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import re
8
+ import shlex
9
+ import subprocess
10
+ import sys
11
+ import threading
12
+ import time
13
+ from datetime import UTC, datetime
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from celery.utils.log import get_task_logger
18
+ from dotenv import load_dotenv
19
+
20
+ from .api_schemas import BackendEventsResponse
21
+ from .celery_app import celery_app
22
+ from .config import load_config
23
+ from .config_utils import PreparedConfig, prepare_config_file
24
+ from .database import session_scope
25
+ from .dispatcher import dispatch_available_jobs
26
+ from .models import (
27
+ Experiment,
28
+ ExperimentJob,
29
+ ExperimentJobStatus,
30
+ ExperimentStatus,
31
+ JobExecutionLog,
32
+ )
33
+ from .results import ResultSummary, collect_result_summary
34
+ from .status import ExperimentStatusTracker
35
+ from .status_tracker import extract_config_info, update_status_from_output
36
+ from .trace_storage import persist_trials_from_summary, update_experiment_metadata
37
+
38
+ logger = get_task_logger(__name__)
39
+
40
+
41
+ TRAIN_COMMAND_ENV = "EXPERIMENT_QUEUE_TRAIN_CMD"
42
+
43
+
44
+ def _load_synth_api_key() -> str:
45
+ """Load SYNTH_API_KEY from .env file and fail loudly if not found.
46
+
47
+ Never falls back to other sources - must be explicitly set in .env file.
48
+
49
+ Returns:
50
+ The API key as a string.
51
+
52
+ Raises:
53
+ RuntimeError: If SYNTH_API_KEY is not found in .env file.
54
+ """
55
+ # Find .env file - check synth-ai root first, then current directory
56
+ repo_root = Path(__file__).resolve().parents[3] # synth_ai/experiment_queue/tasks.py -> synth-ai/
57
+ env_file = repo_root / ".env"
58
+
59
+ if not env_file.exists():
60
+ # Try current directory as fallback
61
+ env_file = Path(".env")
62
+
63
+ if env_file.exists():
64
+ load_dotenv(env_file, override=False) # Don't override existing env vars
65
+
66
+ api_key = os.getenv("SYNTH_API_KEY")
67
+
68
+ if not api_key:
69
+ raise RuntimeError(
70
+ f"❌ SYNTH_API_KEY not found! "
71
+ f"Please set it in {env_file.resolve() if env_file.exists() else 'synth-ai/.env'}. "
72
+ f"No fallback - API key must be explicitly set."
73
+ )
74
+
75
+ return api_key
76
+
77
+
78
+ def _find_venv_python() -> str:
79
+ """Find the venv Python executable to avoid uv cache permission issues.
80
+
81
+ Checks in order:
82
+ 1. sys.executable if already in a venv
83
+ 2. .venv/bin/python relative to current working directory
84
+ 3. .venv/bin/python relative to repo root (if synth_ai package is installed)
85
+ 4. Falls back to 'python' if venv not found
86
+ """
87
+ # If we're already running in a venv, use that
88
+ if sys.executable and ("venv" in sys.executable or ".venv" in sys.executable):
89
+ return sys.executable
90
+
91
+ # Check .venv/bin/python relative to current working directory
92
+ cwd_venv = Path.cwd() / ".venv" / "bin" / "python"
93
+ if cwd_venv.exists() and os.access(cwd_venv, os.X_OK):
94
+ return str(cwd_venv)
95
+
96
+ # Check .venv/bin/python relative to synth_ai package location
97
+ try:
98
+ import synth_ai
99
+
100
+ package_path = Path(synth_ai.__file__ or Path(__file__).resolve()).parent.parent.parent
101
+ pkg_venv = package_path / ".venv" / "bin" / "python"
102
+ if pkg_venv.exists() and os.access(pkg_venv, os.X_OK):
103
+ return str(pkg_venv)
104
+ except Exception:
105
+ pass
106
+
107
+ # Fallback to system python
108
+ return "python"
109
+
110
+
111
+ def _get_default_train_cmd() -> str:
112
+ """Get the default training command, evaluating venv path lazily.
113
+
114
+ This is called when building the command, not at module import time,
115
+ so it can properly detect the venv based on the current working directory.
116
+ """
117
+ return f"{_find_venv_python()} -m synth_ai.cli train"
118
+
119
+
120
+ def _extract_backend_job_id(output: str) -> str | None:
121
+ """Extract backend job ID from subprocess output.
122
+
123
+ Looks for patterns like:
124
+ - JSON: "job_id": "pl_xxxxx"
125
+ - Pattern: pl_[a-f0-9]+
126
+
127
+ Args:
128
+ output: Subprocess stdout/stderr output
129
+
130
+ Returns:
131
+ Backend job ID if found, None otherwise
132
+
133
+ Raises:
134
+ AssertionError: If extracted ID doesn't match expected format
135
+ """
136
+ if not output:
137
+ return None
138
+
139
+ # Assert output is a string
140
+ assert isinstance(output, str), f"Expected str, got {type(output).__name__}"
141
+
142
+ # Look for job_id in JSON response
143
+ match = re.search(r'"job_id"\s*:\s*"([^"]+)"', output)
144
+ if match:
145
+ job_id = match.group(1)
146
+ # Validate format
147
+ assert job_id.startswith("pl_"), f"Extracted job_id doesn't match expected format 'pl_*': {job_id}"
148
+ assert len(job_id) > 3, f"Extracted job_id too short: {job_id}"
149
+ return job_id
150
+
151
+ # Try pattern pl_xxxxx
152
+ match = re.search(r'pl_[a-f0-9]+', output)
153
+ if match:
154
+ job_id = match.group(0)
155
+ # Validate format
156
+ assert job_id.startswith("pl_"), f"Extracted job_id doesn't match expected format 'pl_*': {job_id}"
157
+ assert len(job_id) > 3, f"Extracted job_id too short: {job_id}"
158
+ return job_id
159
+
160
+ return None
161
+
162
+
163
+ def _poll_backend_progress(
164
+ backend_job_id: str,
165
+ status_tracker: ExperimentStatusTracker,
166
+ policy: str | None,
167
+ environment: str | None,
168
+ backend_url: str,
169
+ api_key: str,
170
+ stop_event: threading.Event,
171
+ job_start_time: float | None = None,
172
+ ) -> None:
173
+ """Poll backend API for progress events and update status_json.
174
+
175
+ Polls the backend API endpoint `/prompt-learning/online/jobs/{backend_job_id}/events`
176
+ every 5 seconds to fetch `prompt.learning.progress` events containing rollouts,
177
+ ETA, and best score information. Updates the experiment status_json in real-time.
178
+
179
+ Backend URL Configuration:
180
+ - Default: Production (https://api.usesynth.ai/api)
181
+ - Local: Set EXPERIMENT_QUEUE_LOCAL=true or use --local flag (http://localhost:8000/api)
182
+ - Custom: Set EXPERIMENT_QUEUE_BACKEND_URL env var
183
+
184
+ Args:
185
+ backend_job_id: Backend job ID to poll (e.g., "pl_xxxxx")
186
+ status_tracker: ExperimentStatusTracker instance for updating status_json
187
+ policy: Policy model name (e.g., "gpt-4", "llama-3.1-8b-instant")
188
+ environment: Environment name (e.g., "heartdisease", "hotpotqa")
189
+ backend_url: Backend API base URL (from config.backend_url)
190
+ api_key: API key for authentication (from SYNTH_API_KEY env var)
191
+ stop_event: Threading event to signal when to stop polling
192
+ """
193
+ import logging
194
+ import os
195
+
196
+ import requests
197
+
198
+ # Import BackendJobEvent locally to ensure it's available in this function's scope
199
+ from .api_schemas import BackendJobEvent # noqa: F811
200
+
201
+ # Get logger for this thread (logger from parent thread may not work correctly)
202
+ poller_logger = logging.getLogger(f"synth_ai.cli.local.experiment_queue.poller.{backend_job_id}")
203
+
204
+ # Set log level from environment variable if set (allows --loglevel flag to control verbosity)
205
+ # Use Celery's logger hierarchy instead of creating our own handler to avoid duplicates
206
+ log_level_env = os.getenv("EXPERIMENT_QUEUE_LOG_LEVEL", "INFO").upper()
207
+ try:
208
+ log_level = getattr(logging, log_level_env)
209
+ poller_logger.setLevel(log_level)
210
+ # Don't create handlers - let Celery's logging handle it
211
+ # Just propagate to parent logger (Celery's task logger)
212
+ poller_logger.propagate = True
213
+ except (AttributeError, ValueError):
214
+ # Invalid log level, use default
215
+ pass
216
+
217
+ # Validate inputs with assertions
218
+ assert backend_job_id, "backend_job_id cannot be empty"
219
+ assert backend_job_id.startswith("pl_"), f"Invalid backend_job_id format: expected 'pl_*', got '{backend_job_id}'"
220
+ assert backend_url, "backend_url cannot be empty"
221
+ assert backend_url.startswith(("http://", "https://")), f"Invalid backend_url format: {backend_url}"
222
+ assert api_key, "api_key cannot be empty"
223
+ assert status_tracker is not None, "status_tracker cannot be None"
224
+ assert stop_event is not None, "stop_event cannot be None"
225
+
226
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{backend_job_id}/events"
227
+ headers = {"Authorization": f"Bearer {api_key}"}
228
+ last_seq = 0
229
+ progress_start_time: float | None = None # Track when we first see progress
230
+ consecutive_timeouts = 0 # Track consecutive timeouts for exponential backoff
231
+ base_poll_interval = 5.0 # Base polling interval in seconds
232
+
233
+ # ✅ ADD: Track last progress update time to detect stuck jobs
234
+ last_progress_time: float | None = None
235
+ last_rollouts_completed: int | None = None
236
+ last_progress_seq = 0
237
+ stuck_threshold_seconds = 600.0 # 10 minutes without progress = stuck
238
+
239
+ poller_logger.info("📡 Starting progress poller for backend job %s (URL: %s)", backend_job_id, url)
240
+
241
+ while not stop_event.is_set():
242
+ events_received = 0
243
+ try:
244
+ # Assert URL is valid before making request
245
+ assert url.startswith(("http://", "https://")), f"Invalid URL format: {url}"
246
+
247
+ poller_logger.info("Polling backend API: %s (since_seq: %d)", url, last_seq)
248
+
249
+ try:
250
+ resp = requests.get(
251
+ url,
252
+ headers=headers,
253
+ params={"since_seq": last_seq, "limit": 100},
254
+ timeout=120, # Increased to 120s to handle slow backend/PostgREST responses
255
+ )
256
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
257
+ # ✅ ADD: Detect connection pool exhaustion in poller
258
+ error_str = str(e).lower()
259
+ is_pool_exhausted = (
260
+ "connection" in error_str
261
+ or "timeout" in error_str
262
+ or "refused" in error_str
263
+ )
264
+ if is_pool_exhausted:
265
+ # 🔥 VERY LOUD ERROR MESSAGES FOR CONNECTION POOL ISSUES IN POLLER
266
+ print("=" * 100, flush=True)
267
+ print("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER) 🔥🔥🔥", flush=True)
268
+ print("=" * 100, flush=True)
269
+ print(f"Backend Job ID: {backend_job_id}", flush=True)
270
+ print(f"URL: {url}", flush=True)
271
+ print(f"Error: {type(e).__name__}: {str(e)}", flush=True)
272
+ print("=" * 100, flush=True)
273
+ print("⚠️ Cannot fetch events - connection pool may be exhausted!", flush=True)
274
+ print("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables", flush=True)
275
+ print("=" * 100, flush=True)
276
+
277
+ poller_logger.error("=" * 100)
278
+ poller_logger.error("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER) 🔥🔥🔥")
279
+ poller_logger.error("=" * 100)
280
+ poller_logger.error("Backend Job ID: %s | URL: %s", backend_job_id, url)
281
+ poller_logger.error("Error: %s: %s", type(e).__name__, str(e))
282
+ poller_logger.error("⚠️ Cannot fetch events - connection pool may be exhausted!")
283
+ poller_logger.error("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables")
284
+ poller_logger.error("=" * 100)
285
+ raise
286
+
287
+ # Assert we got a response object
288
+ assert resp is not None, "requests.get() returned None"
289
+
290
+ poller_logger.info("API response: status=%d, content_length=%d", resp.status_code, len(resp.content))
291
+
292
+ # ✅ ADD: Detect connection pool exhaustion in HTTP error responses
293
+ if resp.status_code not in (200, 201):
294
+ body_text = (resp.text or "")[:500].lower()
295
+ is_pool_exhausted = (
296
+ resp.status_code == 503 # Service Unavailable
297
+ or resp.status_code == 429 # Too Many Requests (after long wait)
298
+ or "connection pool" in body_text
299
+ or "too many clients" in body_text
300
+ or "maxclients" in body_text
301
+ or "max clients" in body_text
302
+ or "connection refused" in body_text
303
+ )
304
+
305
+ if is_pool_exhausted:
306
+ # 🔥 VERY LOUD ERROR MESSAGES FOR CONNECTION POOL ISSUES IN POLLER
307
+ print("=" * 100, flush=True)
308
+ print("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER HTTP ERROR) 🔥🔥🔥", flush=True)
309
+ print("=" * 100, flush=True)
310
+ print(f"Backend Job ID: {backend_job_id}", flush=True)
311
+ print(f"URL: {url}", flush=True)
312
+ print(f"HTTP Status: {resp.status_code}", flush=True)
313
+ print(f"Response Body: {resp.text[:500]}", flush=True)
314
+ print("=" * 100, flush=True)
315
+ print("⚠️ Cannot fetch events - connection pool may be exhausted!", flush=True)
316
+ print("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables", flush=True)
317
+ print("=" * 100, flush=True)
318
+
319
+ poller_logger.error("=" * 100)
320
+ poller_logger.error("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER HTTP ERROR) 🔥🔥🔥")
321
+ poller_logger.error("=" * 100)
322
+ poller_logger.error("Backend Job ID: %s | URL: %s | HTTP: %d", backend_job_id, url, resp.status_code)
323
+ poller_logger.error("Response Body: %s", resp.text[:500])
324
+ poller_logger.error("⚠️ Cannot fetch events - connection pool may be exhausted!")
325
+ poller_logger.error("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables")
326
+ poller_logger.error("=" * 100)
327
+
328
+ if resp.status_code == 200:
329
+ # Parse and validate API response using Pydantic models
330
+ try:
331
+ raw_data = resp.json()
332
+ # Assert response is not None
333
+ assert raw_data is not None, "API returned None response"
334
+
335
+ # Parse response with validation
336
+ assert isinstance(raw_data, dict | list), (
337
+ f"API response must be dict or list, got {type(raw_data).__name__}: {raw_data}"
338
+ )
339
+
340
+ events_response = BackendEventsResponse.parse_response(raw_data)
341
+ assert isinstance(events_response, BackendEventsResponse), (
342
+ f"parse_response returned wrong type: {type(events_response).__name__}"
343
+ )
344
+ assert isinstance(events_response.events, list), (
345
+ f"events_response.events must be list, got {type(events_response.events).__name__}"
346
+ )
347
+
348
+ events_received = len(events_response.events)
349
+ assert events_received >= 0, (
350
+ f"events_received must be >= 0, got {events_received}"
351
+ )
352
+
353
+ # Process each event
354
+ event_types_seen: dict[str, int] = {}
355
+ for idx, event in enumerate(events_response.events):
356
+ # Assert event is BackendJobEvent instance
357
+ assert isinstance(event, BackendJobEvent), (
358
+ f"Event at index {idx} must be BackendJobEvent, got {type(event).__name__}"
359
+ )
360
+ # Assert event has required fields
361
+ assert event.seq >= 0, f"Invalid seq: {event.seq}"
362
+ assert event.type, f"Event missing type field: {event}"
363
+ assert event.message, f"Event missing message field: {event}"
364
+
365
+ # Track event types for debugging
366
+ event_types_seen[event.type] = event_types_seen.get(event.type, 0) + 1
367
+
368
+ # Check if this is a progress event
369
+ if event.type == "prompt.learning.progress":
370
+ poller_logger.info(
371
+ "Found progress event seq=%d: %s",
372
+ event.seq,
373
+ event.message[:100],
374
+ )
375
+ # Extract progress data with validation
376
+ progress_data = event.get_progress_data()
377
+ if progress_data is None:
378
+ poller_logger.warning(
379
+ "Progress event seq=%d has no parseable data. Event data: %s",
380
+ event.seq,
381
+ event.data,
382
+ )
383
+ continue
384
+
385
+ poller_logger.debug(
386
+ "Progress event seq=%d data: rollouts_completed=%s, rollouts_total=%s, best_score=%s, eta=%s",
387
+ event.seq,
388
+ progress_data.rollouts_completed,
389
+ progress_data.effective_rollouts_total,
390
+ progress_data.effective_best_score,
391
+ progress_data.eta_seconds,
392
+ )
393
+
394
+ # Use effective getters that handle field name variations
395
+ rollouts_completed = progress_data.rollouts_completed
396
+ rollouts_total = progress_data.effective_rollouts_total
397
+ eta_seconds = progress_data.eta_seconds
398
+ # percent_rollouts from backend is 0-1, convert to 0-100 for display
399
+ progress_pct = None
400
+ if progress_data.percent_rollouts is not None:
401
+ progress_pct = progress_data.percent_rollouts * 100.0
402
+ elif progress_data.percent_overall is not None:
403
+ # Fallback to percent_overall if percent_rollouts not available
404
+ progress_pct = progress_data.percent_overall * 100.0
405
+ best_score = progress_data.effective_best_score
406
+
407
+ # Track when we first see progress (for rollouts/min calculation)
408
+ if rollouts_completed is not None and rollouts_completed > 0 and progress_start_time is None:
409
+ progress_start_time = time.time()
410
+
411
+ # Calculate rollouts/min if we have progress and timing info
412
+ rollouts_per_minute = None
413
+ if rollouts_completed is not None and rollouts_completed > 0:
414
+ # Use progress_start_time if available, otherwise fall back to job_start_time
415
+ start_time_for_rate = progress_start_time or job_start_time
416
+ if start_time_for_rate is not None:
417
+ elapsed = time.time() - start_time_for_rate
418
+ if elapsed > 0:
419
+ rate_per_second = rollouts_completed / elapsed
420
+ rollouts_per_minute = rate_per_second * 60.0
421
+
422
+ # Assert data types and ranges
423
+ if rollouts_completed is not None:
424
+ assert isinstance(rollouts_completed, int), (
425
+ f"rollouts_completed must be int, got {type(rollouts_completed).__name__}: {rollouts_completed}"
426
+ )
427
+ assert rollouts_completed >= 0, (
428
+ f"rollouts_completed must be >= 0, got {rollouts_completed}"
429
+ )
430
+
431
+ if rollouts_total is not None:
432
+ assert isinstance(rollouts_total, int), (
433
+ f"rollouts_total must be int, got {type(rollouts_total).__name__}: {rollouts_total}"
434
+ )
435
+ assert rollouts_total > 0, (
436
+ f"rollouts_total must be > 0, got {rollouts_total}"
437
+ )
438
+
439
+ if eta_seconds is not None:
440
+ assert isinstance(eta_seconds, int | float), (
441
+ f"eta_seconds must be int | float, got {type(eta_seconds).__name__}: {eta_seconds}"
442
+ )
443
+ assert eta_seconds >= 0, (
444
+ f"eta_seconds must be >= 0, got {eta_seconds}"
445
+ )
446
+
447
+ if best_score is not None:
448
+ assert isinstance(best_score, int | float), (
449
+ f"best_score must be int | float, got {type(best_score).__name__}: {best_score}"
450
+ )
451
+ assert 0 <= best_score <= 1, (
452
+ f"best_score must be in [0, 1], got {best_score}"
453
+ )
454
+
455
+ if progress_pct is not None:
456
+ assert isinstance(progress_pct, int | float), (
457
+ f"progress_pct must be int | float, got {type(progress_pct).__name__}: {progress_pct}"
458
+ )
459
+ assert 0 <= progress_pct <= 100, (
460
+ f"progress_pct must be in [0, 100], got {progress_pct}"
461
+ )
462
+
463
+ # Assert consistency: rollouts_completed <= rollouts_total
464
+ if rollouts_completed is not None and rollouts_total is not None:
465
+ assert rollouts_completed <= rollouts_total, (
466
+ f"rollouts_completed ({rollouts_completed}) > rollouts_total ({rollouts_total})"
467
+ )
468
+
469
+ # Assert we have meaningful progress data
470
+ has_progress = (
471
+ rollouts_completed is not None
472
+ or best_score is not None
473
+ or rollouts_total is not None
474
+ )
475
+
476
+ # ✅ Initialize custom_fields before use (extract from event data for validation phase tracking)
477
+ custom_fields: dict[str, Any] = {}
478
+ if event.data and isinstance(event.data, dict):
479
+ # Extract phase and validation info if present
480
+ phase = event.data.get("phase")
481
+ if phase == "validation":
482
+ custom_fields["phase"] = "validation"
483
+ if "validation_candidate" in event.data:
484
+ custom_fields["validation_candidate"] = event.data["validation_candidate"]
485
+ if "validation_total" in event.data:
486
+ custom_fields["validation_total"] = event.data["validation_total"]
487
+
488
+ if has_progress:
489
+ # Validate status_tracker before update
490
+ assert status_tracker is not None, "status_tracker is None"
491
+ assert hasattr(status_tracker, "update"), "status_tracker missing update method"
492
+ assert hasattr(status_tracker, "job_id"), "status_tracker missing job_id"
493
+
494
+ status_tracker.update(
495
+ policy=policy,
496
+ environment=environment,
497
+ rollouts_completed=rollouts_completed,
498
+ total_rollouts=rollouts_total,
499
+ eta_seconds=eta_seconds,
500
+ progress_pct=progress_pct,
501
+ best_score=best_score,
502
+ rollouts_per_minute=rollouts_per_minute,
503
+ custom_fields=custom_fields if custom_fields else None,
504
+ )
505
+
506
+ # ✅ ADD: Track progress for stuck detection
507
+ import time as _time_module
508
+ current_time = _time_module.time()
509
+ if rollouts_completed is not None:
510
+ if last_rollouts_completed is None or rollouts_completed != last_rollouts_completed:
511
+ # Progress changed - update tracking
512
+ last_progress_time = current_time
513
+ last_rollouts_completed = rollouts_completed
514
+ last_progress_seq = event.seq
515
+ poller_logger.info(
516
+ "📊 Progress update for job %s: %s/%s rollouts, ETA: %s, Best: %s",
517
+ backend_job_id,
518
+ rollouts_completed,
519
+ rollouts_total,
520
+ eta_seconds,
521
+ best_score,
522
+ )
523
+ elif last_progress_time is not None:
524
+ # Check if stuck (no progress for threshold time)
525
+ time_since_progress = current_time - last_progress_time
526
+ if time_since_progress >= stuck_threshold_seconds:
527
+ poller_logger.warning(
528
+ "⚠️ Job %s appears STUCK: No progress for %.1f minutes (last: %s/%s rollouts at seq %d)",
529
+ backend_job_id,
530
+ time_since_progress / 60.0,
531
+ last_rollouts_completed,
532
+ rollouts_total,
533
+ last_progress_seq,
534
+ )
535
+ # Emit warning event
536
+ with contextlib.suppress(Exception):
537
+ status_tracker.update(
538
+ custom_fields={
539
+ **(custom_fields or {}),
540
+ "stuck_warning": True,
541
+ "time_since_progress_seconds": time_since_progress,
542
+ }
543
+ )
544
+ else:
545
+ # No rollouts info - log anyway
546
+ poller_logger.info(
547
+ "📊 Progress update for job %s: %s/%s rollouts, ETA: %s, Best: %s",
548
+ backend_job_id,
549
+ rollouts_completed,
550
+ rollouts_total,
551
+ eta_seconds,
552
+ best_score,
553
+ )
554
+
555
+ # Update last_seq (always update, even if no progress data)
556
+ last_seq = max(last_seq, event.seq)
557
+ else:
558
+ # Non-progress event - just update seq
559
+ last_seq = max(last_seq, event.seq)
560
+
561
+ # ✅ ADD: Track consecutive polls with no new events
562
+ if events_received == 0:
563
+ # Increment counter for no-event polls
564
+ if not hasattr(_poll_backend_progress, '_no_event_polls'):
565
+ _poll_backend_progress._no_event_polls = {} # type: ignore[attr-defined]
566
+ if backend_job_id not in _poll_backend_progress._no_event_polls: # type: ignore[attr-defined]
567
+ _poll_backend_progress._no_event_polls[backend_job_id] = 0 # type: ignore[attr-defined]
568
+ _poll_backend_progress._no_event_polls[backend_job_id] += 1 # type: ignore[attr-defined]
569
+ no_event_count = _poll_backend_progress._no_event_polls[backend_job_id] # type: ignore[attr-defined]
570
+
571
+ # Warn if we've had many consecutive polls with no events
572
+ if no_event_count >= 12: # 12 polls * 5s = 60s with no events
573
+ poller_logger.warning(
574
+ "⚠️ Job %s: No new events for %d consecutive polls (~%ds). Last seq: %d. Job may be stuck.",
575
+ backend_job_id,
576
+ no_event_count,
577
+ no_event_count * int(base_poll_interval),
578
+ last_seq,
579
+ )
580
+ # Emit warning in status_json
581
+ with contextlib.suppress(Exception):
582
+ status_tracker.update(
583
+ custom_fields={
584
+ "no_event_polls": no_event_count,
585
+ "last_event_seq": last_seq,
586
+ "stuck_warning": True,
587
+ }
588
+ )
589
+
590
+ poller_logger.info("Progress poller heartbeat for job %s (no new events, last_seq=%d, consecutive_no_events=%d)", backend_job_id, last_seq, no_event_count)
591
+ else:
592
+ # Reset counter when we get events
593
+ if hasattr(_poll_backend_progress, '_no_event_polls') and backend_job_id in _poll_backend_progress._no_event_polls: # type: ignore[attr-defined]
594
+ _poll_backend_progress._no_event_polls[backend_job_id] = 0 # type: ignore[attr-defined]
595
+
596
+ event_types_str = ", ".join(f"{k}:{v}" for k, v in sorted(event_types_seen.items()))
597
+ poller_logger.info(
598
+ "Processed %d events (types: %s), updated last_seq to %d",
599
+ events_received,
600
+ event_types_str,
601
+ last_seq,
602
+ )
603
+ # Log if we're not seeing progress events
604
+ if "prompt.learning.progress" not in event_types_seen:
605
+ poller_logger.debug(
606
+ "No progress events in this batch (last_seq=%d). Event types seen: %s",
607
+ last_seq,
608
+ event_types_str,
609
+ )
610
+
611
+ # Reset timeout counter on successful request
612
+ consecutive_timeouts = 0
613
+
614
+ except AssertionError as e:
615
+ poller_logger.error(
616
+ "❌ Assertion failed while parsing events for job %s: %s. Response: %s",
617
+ backend_job_id,
618
+ e,
619
+ resp.text[:500] if resp else "No response",
620
+ )
621
+ # Continue polling - don't stop on validation errors
622
+ except ValueError as e:
623
+ poller_logger.error(
624
+ "❌ Invalid API response format for job %s: %s. Response: %s",
625
+ backend_job_id,
626
+ e,
627
+ resp.text[:500] if resp else "No response",
628
+ )
629
+ # Continue polling - don't stop on validation errors
630
+ except Exception as e:
631
+ poller_logger.error(
632
+ "❌ Unexpected error parsing events for job %s: %s. Response: %s",
633
+ backend_job_id,
634
+ e,
635
+ resp.text[:500] if resp else "No response",
636
+ exc_info=True,
637
+ )
638
+ # Continue polling - don't stop on parsing errors
639
+ elif resp.status_code == 404:
640
+ # Job not found yet or doesn't exist - stop polling
641
+ poller_logger.warning("Backend job %s not found (404), stopping poller", backend_job_id)
642
+ break
643
+ elif resp.status_code != 200:
644
+ poller_logger.warning(
645
+ "Backend API returned status %d for job %s: %s",
646
+ resp.status_code,
647
+ backend_job_id,
648
+ resp.text[:200],
649
+ )
650
+ except requests.exceptions.ReadTimeout as e:
651
+ # ReadTimeout is expected when backend is slow - log as warning and use exponential backoff
652
+ consecutive_timeouts += 1
653
+ backoff_seconds = min(base_poll_interval * (2 ** min(consecutive_timeouts - 1, 4)), 60.0) # Max 60s backoff
654
+ poller_logger.warning(
655
+ "Backend timeout polling job %s (consecutive=%d, backing off %.1fs): %s",
656
+ backend_job_id,
657
+ consecutive_timeouts,
658
+ backoff_seconds,
659
+ e,
660
+ )
661
+ # Use exponential backoff on timeout
662
+ stop_event.wait(timeout=backoff_seconds)
663
+ continue
664
+ except requests.exceptions.RequestException as e:
665
+ # Other network errors - log as warning, reset timeout counter
666
+ consecutive_timeouts = 0
667
+ poller_logger.warning("Network error polling job %s: %s", backend_job_id, e)
668
+ except Exception as e:
669
+ # Unexpected errors - log as error but don't crash
670
+ consecutive_timeouts = 0
671
+ poller_logger.error("Progress poller error for job %s: %s", backend_job_id, e, exc_info=True)
672
+
673
+ # Poll every 5 seconds (or after backoff)
674
+ stop_event.wait(timeout=base_poll_interval)
675
+
676
+ poller_logger.info("📡 Stopped progress poller for backend job %s", backend_job_id)
677
+
678
+
679
+ def _truncate(text: str, limit: int = 4000) -> str:
680
+ """Truncate text to a maximum length, keeping the end portion.
681
+
682
+ Args:
683
+ text: Text to truncate
684
+ limit: Maximum length in characters (default: 4000)
685
+
686
+ Returns:
687
+ Truncated text (last `limit` characters if text exceeds limit)
688
+ """
689
+ if len(text) <= limit:
690
+ return text
691
+ return text[-limit:]
692
+
693
+
694
+ def _build_train_command(config_path: str) -> list[str]:
695
+ """Build the training command for running a prompt learning job.
696
+
697
+ Constructs a command list suitable for subprocess execution by:
698
+ 1. Getting the base command from EXPERIMENT_QUEUE_TRAIN_CMD env var or default
699
+ 2. Parsing the base command into segments
700
+ 3. Appending prompt learning specific flags (--type, --config, --poll, etc.)
701
+ 4. Adding --backend flag with URL from experiment queue config
702
+
703
+ Args:
704
+ config_path: Path to the TOML config file for the experiment
705
+
706
+ Returns:
707
+ List of command segments ready for subprocess execution
708
+
709
+ Note:
710
+ The base command defaults to `python -m synth_ai.cli train` if
711
+ EXPERIMENT_QUEUE_TRAIN_CMD is not set. The command always includes
712
+ --type prompt_learning, --config, --poll, --stream-format cli, and --backend flags.
713
+ """
714
+ # Get command from env var or use default (lazily evaluated)
715
+ base_cmd = os.getenv(TRAIN_COMMAND_ENV)
716
+ if base_cmd:
717
+ logger.debug("Using training command from EXPERIMENT_QUEUE_TRAIN_CMD: %s", base_cmd)
718
+ else:
719
+ base_cmd = _get_default_train_cmd()
720
+ logger.debug("Using default training command: %s", base_cmd)
721
+
722
+ segments: list[str] = []
723
+ for part in shlex.split(base_cmd):
724
+ if part:
725
+ segments.append(part)
726
+
727
+ # Get backend URL from config and add --backend flag
728
+ config = load_config()
729
+ backend_url = config.backend_url
730
+
731
+ segments.extend(
732
+ [
733
+ "--type",
734
+ "prompt_learning",
735
+ "--config",
736
+ config_path,
737
+ "--backend",
738
+ backend_url,
739
+ "--poll",
740
+ "--stream-format",
741
+ "cli",
742
+ ]
743
+ )
744
+ return segments
745
+
746
+
747
+ def _mark_job_running(job_id: str, task_id: str | None) -> ExperimentJob | None:
748
+ """Mark a job as running and update its status in the database.
749
+
750
+ Updates the job status to RUNNING, sets the started_at timestamp, and
751
+ optionally associates a Celery task ID. If the parent experiment is
752
+ QUEUED, it is also marked as RUNNING.
753
+
754
+ Args:
755
+ job_id: Job identifier
756
+ task_id: Optional Celery task ID to associate with the job
757
+
758
+ Returns:
759
+ ExperimentJob instance if found, None otherwise
760
+
761
+ Note:
762
+ The job is expunged from the session so it can be safely used outside
763
+ the session scope. The session is committed automatically by session_scope.
764
+ """
765
+ with session_scope() as session:
766
+ job = session.get(ExperimentJob, job_id)
767
+ if not job:
768
+ logger.warning("Job %s missing from database", job_id)
769
+ return None
770
+ job.status = ExperimentJobStatus.RUNNING
771
+ job.started_at = datetime.now(UTC)
772
+ if task_id:
773
+ job.celery_task_id = task_id
774
+ experiment = job.experiment
775
+ if experiment and experiment.status == ExperimentStatus.QUEUED:
776
+ experiment.status = ExperimentStatus.RUNNING
777
+ experiment.started_at = datetime.now(UTC)
778
+ session.flush()
779
+ # Expunge so job can be safely used outside session scope
780
+ session.expunge(job)
781
+ return job
782
+
783
+
784
+ def _jobs_remaining(session, experiment_id: str) -> int:
785
+ """Count remaining jobs (QUEUED or RUNNING) for an experiment.
786
+
787
+ Args:
788
+ session: SQLAlchemy session
789
+ experiment_id: Experiment identifier
790
+
791
+ Returns:
792
+ Number of jobs that are still QUEUED or RUNNING (not completed/failed)
793
+ """
794
+ return (
795
+ session.query(ExperimentJob)
796
+ .filter(
797
+ ExperimentJob.experiment_id == experiment_id,
798
+ ExperimentJob.status.in_(
799
+ [
800
+ ExperimentJobStatus.QUEUED,
801
+ ExperimentJobStatus.RUNNING,
802
+ ]
803
+ ),
804
+ )
805
+ .count()
806
+ )
807
+
808
+
809
+ def _finalize_job(
810
+ job_id: str,
811
+ *,
812
+ summary: ResultSummary,
813
+ success: bool,
814
+ error_message: str | None = None,
815
+ command: str | None = None,
816
+ working_directory: str | None = None,
817
+ python_executable: str | None = None,
818
+ environment_keys: list[str] | None = None,
819
+ ) -> dict[str, Any] | None:
820
+ """Finalize a job by updating its status and persisting results.
821
+
822
+ Updates the job status to COMPLETED or FAILED based on success flag,
823
+ persists trial data if successful, and updates experiment status when
824
+ all jobs are done. If the experiment has remaining jobs, dispatches them.
825
+
826
+ Args:
827
+ job_id: Job identifier
828
+ summary: Result summary containing stdout, stderr, metrics, etc.
829
+ success: Whether the job completed successfully
830
+ error_message: Optional error message if job failed
831
+
832
+ Returns:
833
+ Summary dictionary if job found, None otherwise
834
+
835
+ Note:
836
+ - If successful: Job status set to COMPLETED, trials persisted
837
+ - If failed: Job status set to FAILED, error message stored
838
+ - Experiment status updated to COMPLETED/FAILED only when all jobs done
839
+ - Remaining jobs are dispatched if experiment still has queued jobs
840
+ """
841
+ with session_scope() as session:
842
+ job = session.get(ExperimentJob, job_id)
843
+ if not job:
844
+ logger.warning("Job %s missing during finalize", job_id)
845
+ return None
846
+
847
+ job.completed_at = datetime.now(UTC)
848
+ experiment = job.experiment
849
+
850
+ # ALWAYS create execution log entry (for both success and failure)
851
+ # This allows querying failures directly from the database
852
+ if command is not None and working_directory is not None:
853
+ from uuid import uuid4
854
+ # For failed jobs, store full stdout/stderr (up to 100k chars each)
855
+ # For successful jobs, truncate to 4k chars to save space
856
+ stdout_for_log = summary.stdout or ""
857
+ stderr_for_log = summary.stderr or ""
858
+ if not success:
859
+ # Keep full output for errors (truncate only if extremely large)
860
+ if len(stdout_for_log) > 100000:
861
+ stdout_for_log = f"{stdout_for_log[:50000]}\n\n... (truncated {len(stdout_for_log) - 100000} chars) ...\n\n{stdout_for_log[-50000:]}"
862
+ if len(stderr_for_log) > 100000:
863
+ stderr_for_log = f"{stderr_for_log[:50000]}\n\n... (truncated {len(stderr_for_log) - 100000} chars) ...\n\n{stderr_for_log[-50000:]}"
864
+ else:
865
+ # Truncate successful job output to save space
866
+ stdout_for_log = _truncate(stdout_for_log)
867
+ stderr_for_log = _truncate(stderr_for_log)
868
+
869
+ execution_log = JobExecutionLog(
870
+ log_id=f"log_{uuid4().hex[:12]}",
871
+ job_id=job_id,
872
+ command=command,
873
+ working_directory=working_directory,
874
+ returncode=summary.returncode,
875
+ stdout=stdout_for_log,
876
+ stderr=stderr_for_log,
877
+ python_executable=python_executable,
878
+ environment_keys=environment_keys,
879
+ )
880
+ session.add(execution_log)
881
+ logger.info(
882
+ "Created execution log for job %s: returncode=%d, stdout_len=%d (stored: %d), stderr_len=%d (stored: %d)%s",
883
+ job_id,
884
+ summary.returncode,
885
+ len(summary.stdout or ""),
886
+ len(stdout_for_log),
887
+ len(summary.stderr or ""),
888
+ len(stderr_for_log),
889
+ " [FULL ERROR STORED]" if not success else "",
890
+ )
891
+
892
+ if success:
893
+ # Only set job.result for successful jobs to prevent stale data from previous runs
894
+ job.result = summary.to_dict()
895
+ job.status = ExperimentJobStatus.COMPLETED
896
+ persist_trials_from_summary(session, job, summary)
897
+ if experiment:
898
+ update_experiment_metadata(experiment, summary)
899
+
900
+ # ✅ ADD: Update status_json with final stats from backend job metadata
901
+ if job.backend_job_id:
902
+ try:
903
+ import requests
904
+
905
+ from .service import update_job_status
906
+
907
+ # Fetch backend job metadata
908
+ config = load_config()
909
+ backend_url = config.backend_url
910
+ # Load API key from .env - fail loudly if not found
911
+ try:
912
+ api_key = _load_synth_api_key()
913
+ except RuntimeError as e:
914
+ logger.error(str(e))
915
+ raise
916
+
917
+ if backend_url and api_key:
918
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{job.backend_job_id}"
919
+ headers = {"Authorization": f"Bearer {api_key}"}
920
+ resp = requests.get(url, headers=headers, timeout=60.0) # Increased from 10s to 60s to handle backend overload
921
+
922
+ if resp.status_code == 200:
923
+ backend_job = resp.json()
924
+ backend_metadata = backend_job.get("metadata", {})
925
+ backend_stats = backend_metadata.get("stats", {})
926
+
927
+ if backend_stats:
928
+ # Update status_json with final stats (including scores for result extraction)
929
+ status_update = {
930
+ "trials_tried": backend_stats.get("trials_tried"),
931
+ "total_tokens": backend_stats.get("total_tokens"),
932
+ "total_rollouts": backend_stats.get("total_rollouts"),
933
+ "optimization_rollouts_executed": backend_stats.get("optimization_rollouts_executed"),
934
+ "validation_rollouts_executed": backend_stats.get("validation_rollouts_executed"),
935
+ "optimization_trials_evaluated": backend_stats.get("optimization_trials_evaluated"),
936
+ "validation_trials_evaluated": backend_stats.get("validation_trials_evaluated"),
937
+ # CRITICAL: Store scores for result extraction (if backend job returns 404 later)
938
+ "baseline_score": backend_stats.get("baseline_score"),
939
+ "best_score": backend_stats.get("best_score") or backend_stats.get("best_validation_score"),
940
+ "total_time_seconds": backend_stats.get("total_time_seconds"),
941
+ "eval_seeds_n": backend_stats.get("eval_seeds_n"),
942
+ "transformations_evaluated": backend_stats.get("transformations_evaluated"),
943
+ }
944
+ # Remove None values
945
+ status_update = {k: v for k, v in status_update.items() if v is not None}
946
+ # ✅ ADD: Assertion to ensure we have at least some stats
947
+ assert len(status_update) > 0, f"status_update must not be empty for job {job_id}"
948
+ if status_update:
949
+ update_job_status(job_id, status_update)
950
+ logger.info(
951
+ "Updated status_json with final stats for job %s: %s",
952
+ job_id,
953
+ status_update,
954
+ )
955
+ except Exception as e:
956
+ # Log but don't fail job finalization if stats update fails
957
+ logger.warning(
958
+ "Failed to update status_json with final stats for job %s: %s",
959
+ job_id,
960
+ e,
961
+ )
962
+ else:
963
+ # Job failed - clear job.result to prevent stale data from previous successful runs
964
+ job.result = None
965
+ job.status = ExperimentJobStatus.FAILED
966
+ # Store full error message (truncate to 100k chars max to avoid DB issues, but keep full context)
967
+ full_error = error_message or summary.stderr or "Job failed"
968
+ if len(full_error) > 100000:
969
+ # Keep first 50k and last 50k chars
970
+ full_error = f"{full_error[:50000]}\n\n... (truncated {len(full_error) - 100000} chars) ...\n\n{full_error[-50000:]}"
971
+ job.error = full_error
972
+ if experiment:
973
+ # Don't immediately mark experiment as failed - let remaining jobs continue
974
+ # The experiment will be marked as failed only if all jobs fail
975
+ logger.warning(
976
+ "Job %s failed for experiment %s, but allowing remaining jobs to continue",
977
+ job_id,
978
+ experiment.experiment_id,
979
+ )
980
+
981
+ session.flush()
982
+
983
+ if experiment:
984
+ remaining = _jobs_remaining(session, experiment.experiment_id)
985
+ if remaining == 0:
986
+ # All jobs completed - check if experiment succeeded or failed
987
+ all_jobs = (
988
+ session.query(ExperimentJob)
989
+ .filter(ExperimentJob.experiment_id == experiment.experiment_id)
990
+ .all()
991
+ )
992
+ all_failed = all(
993
+ job.status == ExperimentJobStatus.FAILED for job in all_jobs
994
+ )
995
+ if all_failed:
996
+ experiment.status = ExperimentStatus.FAILED
997
+ experiment.error = (
998
+ all_jobs[0].error if all_jobs else "All jobs failed"
999
+ )
1000
+ else:
1001
+ experiment.status = ExperimentStatus.COMPLETED
1002
+ experiment.completed_at = datetime.now(UTC)
1003
+ else:
1004
+ # Dispatch remaining jobs (periodic task will also handle this as backup)
1005
+ dispatch_available_jobs(session, experiment.experiment_id)
1006
+
1007
+ return summary.to_dict()
1008
+
1009
+
1010
+ @celery_app.task(bind=True, name="synth_ai.cli.local.experiment_queue.run_experiment_job")
1011
+ def run_experiment_job(self, job_id: str) -> dict[str, Any] | None:
1012
+ """Celery task entrypoint for running a prompt learning experiment job.
1013
+
1014
+ This is the main Celery task that executes prompt learning jobs. It:
1015
+ 1. Marks the job as RUNNING
1016
+ 2. Prepares the config file (applies overrides)
1017
+ 3. Builds and executes the training command via subprocess
1018
+ 4. Collects results (stdout, stderr, metrics, artifacts)
1019
+ 5. Finalizes the job (updates status, persists results)
1020
+
1021
+ Args:
1022
+ self: Celery task instance (bound task)
1023
+ job_id: Job identifier from the experiment queue database
1024
+
1025
+ Returns:
1026
+ Result summary dictionary if successful, None if job not found
1027
+
1028
+ Raises:
1029
+ AssertionError: If inputs are invalid (should not happen in production)
1030
+
1031
+ Note:
1032
+ The task runs the training command (`synth-ai train --type prompt_learning`)
1033
+ as a subprocess and captures stdout/stderr. Health check failures and
1034
+ authentication errors are detected and cause job failure even if returncode is 0.
1035
+ """
1036
+ # Validate input
1037
+ assert isinstance(job_id, str), (
1038
+ f"job_id must be str, got {type(job_id).__name__}: {job_id}"
1039
+ )
1040
+ assert job_id, "job_id cannot be empty"
1041
+
1042
+ job = _mark_job_running(job_id, getattr(self.request, "id", None))
1043
+ if not job:
1044
+ logger.warning("Job %s not found or could not be marked as running", job_id)
1045
+ return None
1046
+
1047
+ # Validate job object
1048
+ assert isinstance(job, ExperimentJob), (
1049
+ f"_mark_job_running must return ExperimentJob, got {type(job).__name__}"
1050
+ )
1051
+ assert job.job_id == job_id, (
1052
+ f"Job ID mismatch: expected {job_id}, got {job.job_id}"
1053
+ )
1054
+ assert job.status == ExperimentJobStatus.RUNNING, (
1055
+ f"Job status must be RUNNING, got {job.status}"
1056
+ )
1057
+
1058
+ summary = ResultSummary()
1059
+ prepared: PreparedConfig | None = None
1060
+ success = False
1061
+ error_message: str | None = None # Will be set if training fails
1062
+ cmd: list[str] | None = None # Store command for execution logging
1063
+ env: dict[str, str] | None = None # Store environment for execution logging
1064
+
1065
+ # Initialize status tracker
1066
+ assert job.job_id, "job.job_id cannot be empty"
1067
+ status_tracker = ExperimentStatusTracker(job.job_id)
1068
+ assert status_tracker.job_id == job.job_id, (
1069
+ f"Status tracker job_id mismatch: expected {job.job_id}, got {status_tracker.job_id}"
1070
+ )
1071
+
1072
+ job_start_time = time.time()
1073
+ assert job_start_time > 0, f"job_start_time must be > 0, got {job_start_time}"
1074
+
1075
+ policy: str | None = None
1076
+ environment: str | None = None
1077
+
1078
+ try:
1079
+ # Validate config_path
1080
+ assert job.config_path, "job.config_path cannot be empty"
1081
+ assert isinstance(job.config_path, str), (
1082
+ f"job.config_path must be str, got {type(job.config_path).__name__}"
1083
+ )
1084
+
1085
+ # Validate config_overrides
1086
+ if job.config_overrides is not None:
1087
+ assert isinstance(job.config_overrides, dict), (
1088
+ f"job.config_overrides must be dict, got {type(job.config_overrides).__name__}"
1089
+ )
1090
+
1091
+ prepared = prepare_config_file(job.config_path, job.config_overrides or {})
1092
+ assert prepared is not None, "prepare_config_file returned None"
1093
+ assert isinstance(prepared, PreparedConfig), (
1094
+ f"prepare_config_file must return PreparedConfig, got {type(prepared).__name__}"
1095
+ )
1096
+ assert prepared.path.exists(), (
1097
+ f"Prepared config file must exist: {prepared.path}"
1098
+ )
1099
+
1100
+ # Extract policy and environment from config
1101
+ policy, environment = extract_config_info(prepared.path)
1102
+ assert isinstance(policy, str | type(None)), (
1103
+ f"policy must be str | None, got {type(policy).__name__}: {policy}"
1104
+ )
1105
+ assert isinstance(environment, str | type(None)), (
1106
+ f"environment must be str | None, got {type(environment).__name__}: {environment}"
1107
+ )
1108
+
1109
+ # Extract model/provider from override FIRST (override takes precedence)
1110
+ model_override = None
1111
+ provider_override = None
1112
+ if job.config_overrides:
1113
+ model_override = job.config_overrides.get("prompt_learning.policy.model")
1114
+ provider_override = job.config_overrides.get("prompt_learning.policy.provider")
1115
+
1116
+ # Use override if available, otherwise use extracted
1117
+ final_model = model_override or policy
1118
+ final_provider = provider_override
1119
+
1120
+ # ASSERT: Verify overrides were applied by checking the prepared config
1121
+ if job.config_overrides:
1122
+ rollout_budget_override = job.config_overrides.get("prompt_learning.gepa.rollout.budget")
1123
+ max_rollouts_override = job.config_overrides.get("prompt_learning.termination_config.max_rollouts")
1124
+
1125
+ # Assert model override matches extracted policy
1126
+ if model_override:
1127
+ assert policy == model_override, (
1128
+ f"CRITICAL: Policy model mismatch for job {job.job_id}: "
1129
+ f"override={model_override!r} but extracted={policy!r}. "
1130
+ f"This indicates the override wasn't applied correctly to the prepared config. "
1131
+ f"Config path: {prepared.path}"
1132
+ )
1133
+ logger.info(
1134
+ "✅ Config override verified for job %s: model=%s matches extracted policy",
1135
+ job.job_id,
1136
+ model_override,
1137
+ )
1138
+
1139
+ # Assert provider override if specified
1140
+ if provider_override:
1141
+ # Extract provider from prepared config
1142
+ import tomllib
1143
+ with open(prepared.path, "rb") as f:
1144
+ prepared_config = tomllib.load(f)
1145
+ pl_section = prepared_config.get("prompt_learning", {})
1146
+ policy_section = pl_section.get("policy", {})
1147
+ extracted_provider = policy_section.get("provider") if isinstance(policy_section, dict) else None
1148
+ if extracted_provider:
1149
+ assert extracted_provider == provider_override, (
1150
+ f"CRITICAL: Provider mismatch for job {job.job_id}: "
1151
+ f"override={provider_override!r} but extracted={extracted_provider!r}. "
1152
+ f"Config path: {prepared.path}"
1153
+ )
1154
+
1155
+ # Assert rollout budget override if specified
1156
+ if rollout_budget_override is not None:
1157
+ import tomllib
1158
+ with open(prepared.path, "rb") as f:
1159
+ prepared_config = tomllib.load(f)
1160
+ pl_section = prepared_config.get("prompt_learning", {})
1161
+ gepa_section = pl_section.get("gepa", {})
1162
+ rollout_section = gepa_section.get("rollout", {}) if isinstance(gepa_section, dict) else {}
1163
+ extracted_budget = rollout_section.get("budget") if isinstance(rollout_section, dict) else None
1164
+ if extracted_budget is not None:
1165
+ assert extracted_budget == rollout_budget_override, (
1166
+ f"CRITICAL: Rollout budget mismatch for job {job.job_id}: "
1167
+ f"override={rollout_budget_override} but extracted={extracted_budget}. "
1168
+ f"Config path: {prepared.path}"
1169
+ )
1170
+
1171
+ # Assert max_rollouts override if specified
1172
+ if max_rollouts_override is not None:
1173
+ import tomllib
1174
+ with open(prepared.path, "rb") as f:
1175
+ prepared_config = tomllib.load(f)
1176
+ pl_section = prepared_config.get("prompt_learning", {})
1177
+ termination_section = pl_section.get("termination_config", {})
1178
+ extracted_max_rollouts = termination_section.get("max_rollouts") if isinstance(termination_section, dict) else None
1179
+ if extracted_max_rollouts is not None:
1180
+ assert extracted_max_rollouts == max_rollouts_override, (
1181
+ f"CRITICAL: Max rollouts mismatch for job {job.job_id}: "
1182
+ f"override={max_rollouts_override} but extracted={extracted_max_rollouts}. "
1183
+ f"Config path: {prepared.path}"
1184
+ )
1185
+
1186
+ if final_model or environment:
1187
+ # Build policy string with provider if available
1188
+ policy_str = f"{final_provider}/{final_model}" if final_provider and final_model else final_model
1189
+ status_tracker.update(policy=policy_str, environment=environment)
1190
+ logger.info(
1191
+ "📊 Experiment config for job %s: policy=%s, environment=%s",
1192
+ job.job_id,
1193
+ policy or "unknown",
1194
+ environment or "unknown",
1195
+ )
1196
+
1197
+ cmd = _build_train_command(str(prepared.path))
1198
+ assert isinstance(cmd, list), (
1199
+ f"_build_train_command must return list, got {type(cmd).__name__}"
1200
+ )
1201
+ # Store cmd for execution logging (needed at end of function)
1202
+ assert len(cmd) > 0, "Command list cannot be empty"
1203
+ assert all(isinstance(arg, str) for arg in cmd), (
1204
+ f"All command arguments must be str, got types: {[type(arg).__name__ for arg in cmd]}"
1205
+ )
1206
+ logger.info("Executing job %s via command: %s", job.job_id, " ".join(cmd))
1207
+
1208
+ # Run command with unbuffered output to see errors immediately
1209
+ env = os.environ.copy()
1210
+ assert isinstance(env, dict), (
1211
+ f"os.environ.copy() must return dict, got {type(env).__name__}"
1212
+ )
1213
+ env["PYTHONUNBUFFERED"] = "1"
1214
+
1215
+ # Log authentication status BEFORE running command
1216
+ synth_key = env.get("SYNTH_API_KEY")
1217
+ env_key = env.get("ENVIRONMENT_API_KEY")
1218
+ logger.info(
1219
+ "🔐 Authentication status for job %s:\n"
1220
+ " SYNTH_API_KEY: %s\n"
1221
+ " ENVIRONMENT_API_KEY: %s",
1222
+ job.job_id,
1223
+ f"{synth_key[:8]}...{synth_key[-4:]}" if synth_key and len(synth_key) > 12 else "(NOT SET)",
1224
+ f"{env_key[:8]}...{env_key[-4:]}" if env_key and len(env_key) > 12 else "(NOT SET)",
1225
+ )
1226
+
1227
+ logger.info(
1228
+ "🚀 Starting subprocess for job %s:\n"
1229
+ " Command: %s\n"
1230
+ " Working directory: %s\n"
1231
+ " Python executable: %s\n"
1232
+ " Environment keys: %s",
1233
+ job.job_id,
1234
+ " ".join(cmd),
1235
+ os.getcwd(),
1236
+ env.get("PYTHON", "python"),
1237
+ ", ".join(sorted([k for k in env if "API" in k or "KEY" in k])),
1238
+ )
1239
+
1240
+ # Get backend URL and API key for progress polling
1241
+ config = load_config()
1242
+ assert config is not None, "load_config() returned None"
1243
+ backend_url = config.backend_url
1244
+ assert isinstance(backend_url, str), (
1245
+ f"config.backend_url must be str, got {type(backend_url).__name__}"
1246
+ )
1247
+ assert backend_url.startswith(("http://", "https://")), (
1248
+ f"backend_url must start with http:// or https://, got {backend_url}"
1249
+ )
1250
+
1251
+ # Get API key from .env file - fail loudly if not found
1252
+ # This is needed for the poller thread, which runs in the worker process
1253
+ try:
1254
+ api_key = _load_synth_api_key()
1255
+ except RuntimeError as e:
1256
+ logger.error(str(e))
1257
+ raise
1258
+
1259
+ # Start background progress poller (will be started once we have backend_job_id)
1260
+ poller_stop = threading.Event()
1261
+ assert poller_stop is not None, "threading.Event() returned None"
1262
+ poller_thread: threading.Thread | None = None
1263
+ backend_job_id: str | None = None
1264
+
1265
+ try:
1266
+ # Stream subprocess output line-by-line to extract backend_job_id and parse progress
1267
+ process = subprocess.Popen(
1268
+ cmd,
1269
+ stdout=subprocess.PIPE,
1270
+ stderr=subprocess.STDOUT,
1271
+ text=True,
1272
+ env=env,
1273
+ bufsize=1, # Line buffered
1274
+ )
1275
+ assert process is not None, "subprocess.Popen() returned None"
1276
+ assert process.stdout is not None, "process.stdout is None"
1277
+
1278
+ stdout_lines: list[str] = []
1279
+ accumulated_output = "" # Accumulate output for better pattern matching
1280
+ last_status_update_time = job_start_time
1281
+ status_update_interval = 5.0 # Update status_json every 5 seconds even without progress
1282
+ assert status_update_interval > 0, (
1283
+ f"status_update_interval must be > 0, got {status_update_interval}"
1284
+ )
1285
+
1286
+ # Read output line-by-line with timeout protection
1287
+ # If subprocess crashes immediately, we need to ensure we capture the error
1288
+ try:
1289
+ # Read output line-by-line
1290
+ for line in process.stdout:
1291
+ assert isinstance(line, str), (
1292
+ f"process.stdout line must be str, got {type(line).__name__}"
1293
+ )
1294
+ stdout_lines.append(line)
1295
+ assert isinstance(accumulated_output, str), (
1296
+ f"accumulated_output must be str, got {type(accumulated_output).__name__}"
1297
+ )
1298
+ accumulated_output += line
1299
+ assert len(accumulated_output) >= len(line), (
1300
+ f"accumulated_output length should increase, got {len(accumulated_output)} < {len(line)}"
1301
+ )
1302
+
1303
+ # Try to extract backend_job_id from output
1304
+ if not backend_job_id:
1305
+ extracted_id = _extract_backend_job_id(line)
1306
+ if extracted_id:
1307
+ # Assert extracted ID is valid before using it
1308
+ assert extracted_id.startswith("pl_"), (
1309
+ f"Invalid backend_job_id format: {extracted_id}"
1310
+ )
1311
+ assert len(extracted_id) > 3, (
1312
+ f"Backend job ID too short: {extracted_id}"
1313
+ )
1314
+
1315
+ backend_job_id = extracted_id
1316
+ logger.info("📋 Extracted backend job ID: %s", backend_job_id)
1317
+
1318
+ # ✅ ADD: Store backend_job_id in status_json for debugging
1319
+ status_tracker.update(custom_fields={"backend_job_id": backend_job_id})
1320
+ logger.info("📋 Stored backend_job_id in status_json for job %s", job.job_id)
1321
+
1322
+ # Update job with backend_job_id
1323
+ with session_scope() as session:
1324
+ db_job = session.get(ExperimentJob, job.job_id)
1325
+ if db_job:
1326
+ db_job.backend_job_id = backend_job_id
1327
+ session.commit()
1328
+
1329
+ # Start progress poller now that we have backend_job_id
1330
+ # API key should already be loaded and validated above
1331
+ if not api_key:
1332
+ raise RuntimeError(
1333
+ f"❌ SYNTH_API_KEY not available for job {job.job_id}. "
1334
+ "This should have been caught earlier - API key loading failed."
1335
+ )
1336
+ elif not backend_url:
1337
+ logger.warning(
1338
+ "⚠️ Cannot start progress poller for job %s: backend_url not configured. "
1339
+ "Progress updates will not be available, but job will continue.",
1340
+ job.job_id,
1341
+ )
1342
+ elif backend_job_id and not backend_job_id.startswith("pl_"):
1343
+ logger.warning(
1344
+ "⚠️ Cannot start progress poller for job %s: invalid backend_job_id format: %s. "
1345
+ "Progress updates will not be available, but job will continue.",
1346
+ job.job_id,
1347
+ backend_job_id,
1348
+ )
1349
+
1350
+ if api_key and backend_url and backend_job_id and backend_job_id.startswith("pl_"):
1351
+ # Validate all inputs before starting thread
1352
+ assert isinstance(backend_job_id, str), (
1353
+ f"backend_job_id must be str, got {type(backend_job_id).__name__}"
1354
+ )
1355
+ assert isinstance(status_tracker, ExperimentStatusTracker), (
1356
+ f"status_tracker must be ExperimentStatusTracker, got {type(status_tracker).__name__}"
1357
+ )
1358
+ assert isinstance(backend_url, str), (
1359
+ f"backend_url must be str, got {type(backend_url).__name__}"
1360
+ )
1361
+ assert isinstance(api_key, str), (
1362
+ f"api_key must be str, got {type(api_key).__name__}"
1363
+ )
1364
+ assert poller_stop is not None, "poller_stop cannot be None"
1365
+
1366
+ poller_thread = threading.Thread(
1367
+ target=_poll_backend_progress,
1368
+ args=(
1369
+ backend_job_id,
1370
+ status_tracker,
1371
+ policy,
1372
+ environment,
1373
+ backend_url,
1374
+ api_key,
1375
+ poller_stop,
1376
+ job_start_time, # Pass job start time for rollouts/min calculation
1377
+ ),
1378
+ daemon=True,
1379
+ )
1380
+ assert poller_thread is not None, "threading.Thread() returned None"
1381
+ poller_thread.start()
1382
+ assert poller_thread.is_alive() or not poller_thread.is_alive(), (
1383
+ "Thread should be startable"
1384
+ )
1385
+ logger.info("📡 Started progress poller for backend job %s", backend_job_id)
1386
+ else:
1387
+ logger.warning(
1388
+ "Cannot start progress poller: missing API key or backend URL"
1389
+ )
1390
+
1391
+ # Parse accumulated output for progress updates (fallback if API polling fails)
1392
+ # Use accumulated output (not just current line) for better pattern matching
1393
+ # Update status_json periodically even without progress data to show elapsed time
1394
+ current_time = time.time()
1395
+ assert current_time >= job_start_time, (
1396
+ f"current_time ({current_time}) < job_start_time ({job_start_time})"
1397
+ )
1398
+ assert isinstance(accumulated_output, str), (
1399
+ f"accumulated_output must be str, got {type(accumulated_output).__name__}"
1400
+ )
1401
+
1402
+ should_update = (
1403
+ # Update if we find progress patterns
1404
+ "rollouts=" in line.lower() or
1405
+ "progress:" in line.lower() or
1406
+ "gepa progress:" in line.lower() or
1407
+ # Or update periodically (every 5 seconds) to show elapsed time
1408
+ (current_time - last_status_update_time) >= status_update_interval
1409
+ )
1410
+ assert isinstance(should_update, bool), (
1411
+ f"should_update must be bool, got {type(should_update).__name__}"
1412
+ )
1413
+
1414
+ if should_update:
1415
+ # Validate accumulated_output before parsing
1416
+ assert len(accumulated_output) > 0, "accumulated_output cannot be empty"
1417
+ output_to_parse = accumulated_output[-5000:] # Last 5KB to avoid parsing huge outputs
1418
+ assert isinstance(output_to_parse, str), (
1419
+ f"output_to_parse must be str, got {type(output_to_parse).__name__}"
1420
+ )
1421
+ assert len(output_to_parse) <= len(accumulated_output), (
1422
+ f"output_to_parse length ({len(output_to_parse)}) > accumulated_output length ({len(accumulated_output)})"
1423
+ )
1424
+
1425
+ update_status_from_output(
1426
+ status_tracker,
1427
+ output_to_parse,
1428
+ policy=policy,
1429
+ environment=environment,
1430
+ start_time=job_start_time,
1431
+ )
1432
+ last_status_update_time = current_time
1433
+ assert last_status_update_time >= job_start_time, (
1434
+ f"last_status_update_time ({last_status_update_time}) < job_start_time ({job_start_time})"
1435
+ )
1436
+ except (BrokenPipeError, OSError) as e:
1437
+ # Subprocess may have crashed - log and continue to wait() to get returncode
1438
+ logger.warning(
1439
+ "Error reading subprocess stdout for job %s (process may have crashed): %s",
1440
+ job.job_id,
1441
+ e,
1442
+ )
1443
+ # Continue to process.wait() to get the returncode and any buffered output
1444
+
1445
+ # Wait for process to complete (ALWAYS wait, even if stdout reading failed)
1446
+ assert process is not None, "process is None before wait()"
1447
+ returncode = process.wait()
1448
+
1449
+ # If stdout reading failed but process exited, try to read any remaining buffered output
1450
+ if process.stdout and not stdout_lines:
1451
+ try:
1452
+ remaining_output = process.stdout.read()
1453
+ if remaining_output:
1454
+ stdout_lines.append(remaining_output)
1455
+ accumulated_output += remaining_output
1456
+ logger.info(
1457
+ "Captured remaining subprocess output for job %s after process exit: %d bytes",
1458
+ job.job_id,
1459
+ len(remaining_output),
1460
+ )
1461
+ except Exception as e:
1462
+ logger.warning(
1463
+ "Failed to read remaining subprocess output for job %s: %s",
1464
+ job.job_id,
1465
+ e,
1466
+ )
1467
+ assert isinstance(returncode, int), (
1468
+ f"process.wait() must return int, got {type(returncode).__name__}: {returncode}"
1469
+ )
1470
+
1471
+ # Combine output
1472
+ assert isinstance(stdout_lines, list), (
1473
+ f"stdout_lines must be list, got {type(stdout_lines).__name__}"
1474
+ )
1475
+ assert all(isinstance(line, str) for line in stdout_lines), (
1476
+ f"All stdout_lines must be str, got types: {[type(line).__name__ for line in stdout_lines[:5]]}"
1477
+ )
1478
+
1479
+ stdout = "".join(stdout_lines)
1480
+ assert isinstance(stdout, str), (
1481
+ f"stdout must be str, got {type(stdout).__name__}"
1482
+ )
1483
+ stderr = "" # stderr is redirected to stdout
1484
+ assert isinstance(stderr, str), (
1485
+ f"stderr must be str, got {type(stderr).__name__}"
1486
+ )
1487
+
1488
+ # CRITICAL: If subprocess failed but we have no output, log a warning
1489
+ # This indicates the subprocess crashed before producing any output
1490
+ if returncode != 0 and not stdout:
1491
+ logger.error(
1492
+ "❌ Subprocess for job %s exited with code %d but produced NO output. "
1493
+ "This usually indicates an immediate crash (import error, syntax error, etc.). "
1494
+ "Command: %s",
1495
+ job.job_id,
1496
+ returncode,
1497
+ " ".join(cmd),
1498
+ )
1499
+ # Set a helpful error message
1500
+ stdout = (
1501
+ f"[ERROR] Subprocess crashed immediately with exit code {returncode}. "
1502
+ f"No output captured. This usually indicates:\n"
1503
+ f" 1. Import error (missing module)\n"
1504
+ f" 2. Syntax error in Python code\n"
1505
+ f" 3. Missing executable or PATH issue\n"
1506
+ f" 4. Permission error\n"
1507
+ f"\nCommand: {' '.join(cmd)}\n"
1508
+ f"Working directory: {os.getcwd()}\n"
1509
+ f"Python: {env.get('PYTHON', 'python')}"
1510
+ )
1511
+
1512
+ # Create CompletedProcess-like object for compatibility
1513
+ class CompletedProcess:
1514
+ def __init__(self, returncode: int, stdout: str, stderr: str):
1515
+ assert isinstance(returncode, int), (
1516
+ f"returncode must be int, got {type(returncode).__name__}"
1517
+ )
1518
+ assert isinstance(stdout, str), (
1519
+ f"stdout must be str, got {type(stdout).__name__}"
1520
+ )
1521
+ assert isinstance(stderr, str), (
1522
+ f"stderr must be str, got {type(stderr).__name__}"
1523
+ )
1524
+ self.returncode = returncode
1525
+ self.stdout = stdout
1526
+ self.stderr = stderr
1527
+
1528
+ completed = CompletedProcess(returncode, stdout, stderr)
1529
+ assert isinstance(completed, CompletedProcess), (
1530
+ f"CompletedProcess() must return CompletedProcess, got {type(completed).__name__}"
1531
+ )
1532
+
1533
+ logger.info(
1534
+ "✅ Subprocess completed for job %s:\n"
1535
+ " Return code: %s\n"
1536
+ " Stdout length: %d chars\n"
1537
+ " Stderr length: %d chars",
1538
+ job.job_id,
1539
+ completed.returncode,
1540
+ len(completed.stdout) if completed.stdout else 0,
1541
+ len(completed.stderr) if completed.stderr else 0,
1542
+ )
1543
+
1544
+ # Final status update from complete output
1545
+ assert isinstance(completed.stdout, str), (
1546
+ f"completed.stdout must be str before final update, got {type(completed.stdout).__name__}"
1547
+ )
1548
+ assert len(completed.stdout) > 0 or len(accumulated_output) > 0, (
1549
+ "Must have some output for final status update"
1550
+ )
1551
+
1552
+ # Use accumulated_output if available (more complete), otherwise stdout
1553
+ final_output = accumulated_output if accumulated_output else completed.stdout
1554
+ assert isinstance(final_output, str), (
1555
+ f"final_output must be str, got {type(final_output).__name__}"
1556
+ )
1557
+
1558
+ update_status_from_output(
1559
+ status_tracker,
1560
+ final_output,
1561
+ policy=policy,
1562
+ environment=environment,
1563
+ start_time=job_start_time,
1564
+ )
1565
+ except subprocess.TimeoutExpired as e:
1566
+ logger.error("⏱️ Subprocess TIMEOUT for job %s after %s seconds", job.job_id, e.timeout)
1567
+ raise
1568
+ except Exception as e:
1569
+ logger.error(
1570
+ "❌ Subprocess EXCEPTION for job %s:\n"
1571
+ " Type: %s\n"
1572
+ " Message: %s",
1573
+ job.job_id,
1574
+ type(e).__name__,
1575
+ str(e),
1576
+ exc_info=True,
1577
+ )
1578
+ raise
1579
+ finally:
1580
+ # Stop progress poller
1581
+ if poller_thread and poller_thread.is_alive():
1582
+ poller_stop.set()
1583
+ poller_thread.join(timeout=5)
1584
+ logger.info("📡 Stopped progress poller for job %s", job.job_id)
1585
+
1586
+ # Log full output for debugging - prioritize auth errors
1587
+ logger.info("Training command returncode: %s", completed.returncode)
1588
+
1589
+ # Check for critical errors FIRST - these should cause failure even if returncode is 0
1590
+ stdout_lower = (completed.stdout or "").lower()
1591
+ stderr_lower = (completed.stderr or "").lower()
1592
+ combined_output = (completed.stdout or "") + "\n" + (completed.stderr or "")
1593
+ combined_lower = combined_output.lower()
1594
+
1595
+ # Check for health check failures (common cause of silent failures)
1596
+ health_check_failures = []
1597
+ health_check_details = []
1598
+ if "health check failed" in combined_lower or "aborting due to failing health check" in combined_lower:
1599
+ # Extract full context around health check failure - look for error patterns
1600
+ for source_name, source_text in [("STDOUT", completed.stdout), ("STDERR", completed.stderr)]:
1601
+ if not source_text:
1602
+ continue
1603
+ source_lower = source_text.lower()
1604
+ if "health check" in source_lower:
1605
+ # Find health check failure message
1606
+ idx = source_lower.find("health check")
1607
+ start = max(0, idx - 200)
1608
+ end = min(len(source_text), idx + 500)
1609
+ health_check_failures.append(f"{source_name} (health check context):\n{source_text[start:end]}")
1610
+
1611
+ # Also look for error patterns that might explain WHY it failed
1612
+ # Look for HTTP status codes, error messages, exceptions
1613
+ if "500" in source_text or "internal server error" in source_lower:
1614
+ # Find the 500 error context
1615
+ error_idx = source_lower.find("500") if "500" in source_text else source_lower.find("internal server error")
1616
+ if error_idx >= 0:
1617
+ error_start = max(0, error_idx - 100)
1618
+ error_end = min(len(source_text), error_idx + 800)
1619
+ health_check_details.append(f"{source_name} (500 error details):\n{source_text[error_start:error_end]}")
1620
+
1621
+ # Look for tracebacks or exception messages
1622
+ if "traceback" in source_lower or "exception" in source_lower or "error:" in source_lower:
1623
+ # Find traceback/exception
1624
+ tb_idx = source_lower.find("traceback") if "traceback" in source_lower else (
1625
+ source_lower.find("exception") if "exception" in source_lower else source_lower.find("error:")
1626
+ )
1627
+ if tb_idx >= 0:
1628
+ tb_start = max(0, tb_idx - 50)
1629
+ tb_end = min(len(source_text), tb_idx + 1500) # Get more context for tracebacks
1630
+ health_check_details.append(f"{source_name} (exception/traceback):\n{source_text[tb_start:tb_end]}")
1631
+
1632
+ # Look for specific error messages like "ModuleNotFoundError", "RuntimeError", etc.
1633
+ error_patterns = [
1634
+ r"(ModuleNotFoundError|ImportError|RuntimeError|ValueError|KeyError|AttributeError)[^\n]*",
1635
+ r"Failed to [^\n]+",
1636
+ r"Unable to [^\n]+",
1637
+ r"Missing [^\n]+",
1638
+ ]
1639
+ for pattern in error_patterns:
1640
+ matches = re.finditer(pattern, source_text, re.IGNORECASE | re.MULTILINE)
1641
+ for match in matches:
1642
+ match_start = max(0, match.start() - 100)
1643
+ match_end = min(len(source_text), match.end() + 300)
1644
+ health_check_details.append(f"{source_name} (error pattern '{pattern[:30]}...'):\n{source_text[match_start:match_end]}")
1645
+
1646
+ if health_check_failures:
1647
+ success = False
1648
+ # Build informative error message
1649
+ error_parts = [
1650
+ "Training command failed health check. Task app endpoint returned error.",
1651
+ ]
1652
+ if health_check_details:
1653
+ error_parts.append("See details below for root cause.")
1654
+ else:
1655
+ error_parts.append("Check task app logs and ensure /task_info endpoint is working.")
1656
+
1657
+ error_message = " ".join(error_parts)
1658
+
1659
+ logger.error(
1660
+ "🚨 HEALTH CHECK FAILURE for job %s:\n%s",
1661
+ job.job_id,
1662
+ "\n".join(health_check_failures),
1663
+ )
1664
+
1665
+ if health_check_details:
1666
+ logger.error(
1667
+ "🔍 ROOT CAUSE ANALYSIS for job %s:\n%s",
1668
+ job.job_id,
1669
+ "\n" + "="*80 + "\n".join(health_check_details) + "\n" + "="*80,
1670
+ )
1671
+
1672
+ # Check for authentication-related errors
1673
+ auth_keywords = [
1674
+ "authentication",
1675
+ "authorization",
1676
+ "api key",
1677
+ "api_key",
1678
+ "missing api",
1679
+ "invalid api",
1680
+ "unauthorized",
1681
+ "forbidden",
1682
+ "401",
1683
+ "403",
1684
+ "missing",
1685
+ "not set",
1686
+ "required",
1687
+ ]
1688
+
1689
+ auth_errors = []
1690
+ for keyword in auth_keywords:
1691
+ if keyword in stdout_lower:
1692
+ # Extract context around the keyword
1693
+ idx = stdout_lower.find(keyword)
1694
+ start = max(0, idx - 100)
1695
+ end = min(len(completed.stdout), idx + 200)
1696
+ auth_errors.append(f"STDOUT: ...{completed.stdout[start:end]}...")
1697
+ if keyword in stderr_lower:
1698
+ idx = stderr_lower.find(keyword)
1699
+ start = max(0, idx - 100)
1700
+ end = min(len(completed.stderr), idx + 200)
1701
+ auth_errors.append(f"STDERR: ...{completed.stderr[start:end]}...")
1702
+
1703
+ if auth_errors:
1704
+ logger.error(
1705
+ "🚨 AUTHENTICATION ERRORS DETECTED for job %s:\n%s",
1706
+ job.job_id,
1707
+ "\n".join(auth_errors),
1708
+ )
1709
+
1710
+ # Log full output (especially important for errors)
1711
+ if completed.stdout:
1712
+ if not success:
1713
+ # For errors, log full output
1714
+ logger.error("Training command stdout (FULL, %d chars):\n%s", len(completed.stdout), completed.stdout)
1715
+ else:
1716
+ # For success, log last 2000 chars
1717
+ logger.info("Training command stdout (last 2000 chars):\n%s", completed.stdout[-2000:])
1718
+ else:
1719
+ logger.warning("Training command stdout is EMPTY - command may have exited before producing output")
1720
+
1721
+ if completed.stderr:
1722
+ if not success:
1723
+ # For errors, log full output
1724
+ logger.error("Training command stderr (FULL, %d chars):\n%s", len(completed.stderr), completed.stderr)
1725
+ else:
1726
+ # For success, log last 2000 chars
1727
+ logger.warning("Training command stderr (last 2000 chars):\n%s", completed.stderr[-2000:])
1728
+ else:
1729
+ logger.info("Training command stderr is empty")
1730
+ # Validate inputs before collecting results
1731
+ assert prepared is not None, "prepared cannot be None"
1732
+ assert isinstance(prepared, PreparedConfig), (
1733
+ f"prepared must be PreparedConfig, got {type(prepared).__name__}"
1734
+ )
1735
+ assert isinstance(prepared.results_folder, Path), (
1736
+ f"prepared.results_folder must be Path, got {type(prepared.results_folder).__name__}"
1737
+ )
1738
+ assert isinstance(completed.stdout, str), (
1739
+ f"completed.stdout must be str, got {type(completed.stdout).__name__}"
1740
+ )
1741
+ assert isinstance(completed.stderr, str), (
1742
+ f"completed.stderr must be str, got {type(completed.stderr).__name__}"
1743
+ )
1744
+
1745
+ artifact_summary = collect_result_summary(
1746
+ prepared.results_folder,
1747
+ stdout=completed.stdout,
1748
+ stderr=completed.stderr,
1749
+ )
1750
+ assert isinstance(artifact_summary, ResultSummary), (
1751
+ f"collect_result_summary must return ResultSummary, got {type(artifact_summary).__name__}"
1752
+ )
1753
+
1754
+ artifact_summary.stdout = _truncate(completed.stdout)
1755
+ assert isinstance(artifact_summary.stdout, str), (
1756
+ f"artifact_summary.stdout must be str after truncate, got {type(artifact_summary.stdout).__name__}"
1757
+ )
1758
+ artifact_summary.stderr = _truncate(completed.stderr)
1759
+ assert isinstance(artifact_summary.stderr, str), (
1760
+ f"artifact_summary.stderr must be str after truncate, got {type(artifact_summary.stderr).__name__}"
1761
+ )
1762
+ artifact_summary.returncode = completed.returncode
1763
+ assert isinstance(artifact_summary.returncode, int), (
1764
+ f"artifact_summary.returncode must be int, got {type(artifact_summary.returncode).__name__}"
1765
+ )
1766
+ summary = artifact_summary
1767
+ assert isinstance(summary, ResultSummary), (
1768
+ f"summary must be ResultSummary, got {type(summary).__name__}"
1769
+ )
1770
+
1771
+ # ✅ FIX: If summary.total_rollouts is None, try to fetch from backend metadata stats
1772
+ # This handles cases where CLI output parsing fails but backend has accurate stats
1773
+ if summary.total_rollouts is None and backend_job_id:
1774
+ try:
1775
+ import requests
1776
+
1777
+ config = load_config()
1778
+ backend_url = config.backend_url
1779
+ try:
1780
+ api_key = _load_synth_api_key()
1781
+ except RuntimeError:
1782
+ api_key = None
1783
+
1784
+ if backend_url and api_key:
1785
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{backend_job_id}"
1786
+ headers = {"Authorization": f"Bearer {api_key}"}
1787
+ resp = requests.get(url, headers=headers, timeout=10.0)
1788
+
1789
+ if resp.status_code == 200:
1790
+ backend_job = resp.json()
1791
+ backend_metadata = backend_job.get("metadata", {})
1792
+ backend_stats = backend_metadata.get("stats", {})
1793
+
1794
+ # Try to get total_rollouts from backend stats
1795
+ # Prefer total_rollouts, fallback to sum of optimization + validation rollouts
1796
+ backend_total_rollouts = backend_stats.get("total_rollouts")
1797
+ if backend_total_rollouts is None:
1798
+ opt_rollouts = backend_stats.get("optimization_rollouts_executed", 0) or 0
1799
+ val_rollouts = backend_stats.get("validation_rollouts_executed", 0) or 0
1800
+ if opt_rollouts > 0 or val_rollouts > 0:
1801
+ backend_total_rollouts = opt_rollouts + val_rollouts
1802
+
1803
+ if backend_total_rollouts is not None and backend_total_rollouts > 0:
1804
+ summary.total_rollouts = backend_total_rollouts
1805
+ logger.info(
1806
+ "✅ Extracted total_rollouts=%d from backend metadata stats for job %s (backend_job_id=%s)",
1807
+ backend_total_rollouts,
1808
+ job.job_id,
1809
+ backend_job_id,
1810
+ )
1811
+ except Exception as e:
1812
+ # Log but don't fail - backend fetch is best-effort fallback
1813
+ logger.debug(
1814
+ "Could not fetch backend stats to extract rollouts for job %s: %s",
1815
+ job.job_id,
1816
+ e,
1817
+ )
1818
+
1819
+ # Check if training actually ran - for prompt learning (GEPA/MIPRO), we expect results
1820
+ # Note: success may have been set to False above if health check failed
1821
+ if not error_message: # Only check returncode if we haven't already detected a failure
1822
+ success = completed.returncode == 0
1823
+ if success and job.job_type == "gepa":
1824
+ # GEPA should produce rollouts - that's the primary indicator of success
1825
+ # If returncode is 0 but no rollouts were produced, it failed silently
1826
+ if summary.total_rollouts is None or summary.total_rollouts == 0:
1827
+ success = False
1828
+ error_message = (
1829
+ "Training command exited with returncode 0 but produced no rollouts. "
1830
+ "This indicates GEPA did not actually run. "
1831
+ f"Check stdout/stderr for errors. "
1832
+ f"Results folder: {prepared.results_folder}"
1833
+ )
1834
+ logger.error(
1835
+ "Job %s failed silently: %s\nStdout tail:\n%s\nStderr tail:\n%s",
1836
+ job.job_id,
1837
+ error_message,
1838
+ summary.stdout[-1000:] if summary.stdout else "(empty)",
1839
+ summary.stderr[-1000:] if summary.stderr else "(empty)",
1840
+ )
1841
+ else:
1842
+ # We have rollouts - that's sufficient evidence GEPA ran successfully
1843
+ # Learning curve and stats are nice-to-have but not required
1844
+ logger.info(
1845
+ "Job %s completed successfully with %d rollouts (best_score=%s, learning_curve_points=%d, stats=%s)",
1846
+ job.job_id,
1847
+ summary.total_rollouts,
1848
+ summary.best_score,
1849
+ len(summary.learning_curve_points),
1850
+ "yes" if summary.stats else "no",
1851
+ )
1852
+
1853
+ if not success and not error_message:
1854
+ # Build detailed error message with FULL stdout/stderr
1855
+ error_parts = [f"Training command exited with {completed.returncode}"]
1856
+
1857
+ # Include FULL stdout if available (for errors, we want complete context)
1858
+ if completed.stdout:
1859
+ error_parts.append(f"\n\n{'='*80}\nSTDOUT (FULL, {len(completed.stdout)} chars):\n{'='*80}\n{completed.stdout}")
1860
+ else:
1861
+ error_parts.append("\n\nStdout: (empty - subprocess may have crashed immediately)")
1862
+
1863
+ # Include FULL stderr if available
1864
+ if completed.stderr:
1865
+ error_parts.append(f"\n\n{'='*80}\nSTDERR (FULL, {len(completed.stderr)} chars):\n{'='*80}\n{completed.stderr}")
1866
+ else:
1867
+ error_parts.append("\n\nStderr: (empty)")
1868
+
1869
+ error_message = "".join(error_parts)
1870
+
1871
+ # Log full error (truncate only for logger, but keep full in error_message)
1872
+ logger.error(
1873
+ "Job %s failed: %s\nFull stdout (%d chars):\n%s\nFull stderr (%d chars):\n%s",
1874
+ job.job_id,
1875
+ f"Training command exited with {completed.returncode}",
1876
+ len(completed.stdout) if completed.stdout else 0,
1877
+ completed.stdout if completed.stdout else "(empty)",
1878
+ len(completed.stderr) if completed.stderr else 0,
1879
+ completed.stderr if completed.stderr else "(empty)",
1880
+ )
1881
+ except Exception as exc:
1882
+ error_message = str(exc)
1883
+ summary.stderr = _truncate((summary.stderr or "") + f"\n{error_message}")
1884
+ logger.exception("Job %s encountered error: %s", job.job_id, error_message)
1885
+ finally:
1886
+ if prepared:
1887
+ prepared.cleanup()
1888
+
1889
+ # Prepare execution details for logging
1890
+ command_str = " ".join(cmd) if cmd is not None and len(cmd) > 0 else None
1891
+ working_dir = os.getcwd()
1892
+ if env is not None:
1893
+ python_exe = env.get("PYTHON", "python")
1894
+ env_keys = list(env.keys())
1895
+ else:
1896
+ python_exe = None
1897
+ env_keys = None
1898
+
1899
+ return _finalize_job(
1900
+ job.job_id,
1901
+ summary=summary,
1902
+ success=success,
1903
+ error_message=error_message,
1904
+ command=command_str,
1905
+ working_directory=working_dir,
1906
+ python_executable=python_exe,
1907
+ environment_keys=env_keys,
1908
+ )
1909
+
1910
+
1911
+ @celery_app.task(name="synth_ai.cli.local.experiment_queue.process_experiment_queue")
1912
+ def process_experiment_queue() -> dict[str, Any]:
1913
+ """Periodic task that checks for queued jobs and dispatches them.
1914
+
1915
+ This task runs every 5 seconds (via Celery Beat) to ensure queued jobs
1916
+ are dispatched even if:
1917
+ - Previous dispatch attempts failed
1918
+ - Jobs were queued while other jobs were running
1919
+ - Worker restarted and missed dispatch events
1920
+
1921
+ Returns a summary of dispatched jobs.
1922
+ """
1923
+ # Verify we're using the correct database
1924
+ from .config import load_config
1925
+ config = load_config()
1926
+ env_db_path = os.getenv("EXPERIMENT_QUEUE_DB_PATH")
1927
+ if env_db_path:
1928
+ from pathlib import Path
1929
+ env_db_path_resolved = Path(env_db_path).expanduser().resolve()
1930
+ if config.sqlite_path != env_db_path_resolved:
1931
+ logger.error(
1932
+ "Database path mismatch in periodic task! ENV: %s != CONFIG: %s",
1933
+ env_db_path_resolved,
1934
+ config.sqlite_path,
1935
+ )
1936
+
1937
+ logger.debug("Processing experiment queue for queued jobs (database: %s)", config.sqlite_path)
1938
+ dispatched_count = 0
1939
+ experiments_checked = 0
1940
+
1941
+ with session_scope() as session:
1942
+ # Find all running or queued experiments that might have jobs to dispatch
1943
+ active_experiments = (
1944
+ session.query(Experiment)
1945
+ .filter(
1946
+ Experiment.status.in_([ExperimentStatus.QUEUED, ExperimentStatus.RUNNING])
1947
+ )
1948
+ .all()
1949
+ )
1950
+
1951
+ for experiment in active_experiments:
1952
+ experiments_checked += 1
1953
+ # Check if there are any queued jobs without celery_task_id
1954
+ queued_jobs = (
1955
+ session.query(ExperimentJob)
1956
+ .filter(
1957
+ ExperimentJob.experiment_id == experiment.experiment_id,
1958
+ ExperimentJob.status == ExperimentJobStatus.QUEUED,
1959
+ ExperimentJob.celery_task_id.is_(None),
1960
+ )
1961
+ .count()
1962
+ )
1963
+
1964
+ if queued_jobs > 0:
1965
+ logger.debug(
1966
+ "Found %d queued jobs for experiment %s, attempting dispatch",
1967
+ queued_jobs,
1968
+ experiment.experiment_id,
1969
+ )
1970
+ dispatched = dispatch_available_jobs(session, experiment.experiment_id)
1971
+ dispatched_count += len(dispatched)
1972
+ if dispatched:
1973
+ logger.info(
1974
+ "Dispatched %d jobs for experiment %s",
1975
+ len(dispatched),
1976
+ experiment.experiment_id,
1977
+ )
1978
+
1979
+ result = {
1980
+ "dispatched": dispatched_count,
1981
+ "experiments_checked": experiments_checked,
1982
+ }
1983
+ logger.debug("Queue check completed: %s", result)
1984
+ return result