synth-ai 0.2.9.dev11__py3-none-any.whl → 0.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (909) hide show
  1. synth_ai/__init__.py +44 -45
  2. synth_ai/__main__.py +30 -3
  3. synth_ai/cli/__init__.py +104 -78
  4. synth_ai/cli/__main__.py +42 -0
  5. synth_ai/cli/_internal/__init__.py +5 -0
  6. synth_ai/cli/_internal/modal_wrapper.py +31 -0
  7. synth_ai/cli/_internal/storage.py +20 -0
  8. synth_ai/cli/_internal/typer_patch.py +47 -0
  9. synth_ai/cli/_internal/validate_task_app.py +29 -0
  10. synth_ai/cli/agents/__init__.py +17 -0
  11. synth_ai/cli/agents/claude.py +77 -0
  12. synth_ai/cli/agents/codex.py +265 -0
  13. synth_ai/cli/agents/opencode.py +253 -0
  14. synth_ai/cli/commands/__init__.py +18 -0
  15. synth_ai/cli/commands/artifacts/__init__.py +13 -0
  16. synth_ai/cli/commands/artifacts/client.py +119 -0
  17. synth_ai/cli/commands/artifacts/config.py +57 -0
  18. synth_ai/cli/commands/artifacts/core.py +24 -0
  19. synth_ai/cli/commands/artifacts/download.py +188 -0
  20. synth_ai/cli/commands/artifacts/export.py +186 -0
  21. synth_ai/cli/commands/artifacts/list.py +156 -0
  22. synth_ai/cli/commands/artifacts/parsing.py +250 -0
  23. synth_ai/cli/commands/artifacts/show.py +336 -0
  24. synth_ai/cli/commands/baseline/__init__.py +12 -0
  25. synth_ai/cli/commands/baseline/core.py +636 -0
  26. synth_ai/cli/commands/baseline/list.py +94 -0
  27. synth_ai/cli/commands/demo/__init__.py +3 -0
  28. synth_ai/cli/commands/demo/core.py +153 -0
  29. synth_ai/cli/commands/eval/__init__.py +19 -0
  30. synth_ai/cli/commands/eval/core.py +1113 -0
  31. synth_ai/cli/commands/eval/errors.py +81 -0
  32. synth_ai/cli/commands/eval/validation.py +133 -0
  33. synth_ai/cli/commands/filter/__init__.py +12 -0
  34. synth_ai/cli/commands/filter/core.py +424 -0
  35. synth_ai/cli/commands/filter/errors.py +55 -0
  36. synth_ai/cli/commands/filter/validation.py +77 -0
  37. synth_ai/cli/commands/help/__init__.py +185 -0
  38. synth_ai/cli/commands/help/core.py +72 -0
  39. synth_ai/cli/commands/scan/__init__.py +19 -0
  40. synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
  41. synth_ai/cli/commands/scan/core.py +344 -0
  42. synth_ai/cli/commands/scan/health_checker.py +242 -0
  43. synth_ai/cli/commands/scan/local_scanner.py +278 -0
  44. synth_ai/cli/commands/scan/models.py +83 -0
  45. synth_ai/cli/commands/smoke/__init__.py +7 -0
  46. synth_ai/cli/commands/smoke/core.py +1438 -0
  47. synth_ai/cli/commands/status/__init__.py +66 -0
  48. synth_ai/cli/commands/status/client.py +192 -0
  49. synth_ai/cli/commands/status/config.py +92 -0
  50. synth_ai/cli/commands/status/errors.py +20 -0
  51. synth_ai/cli/commands/status/formatters.py +164 -0
  52. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  53. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  54. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  55. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  56. synth_ai/cli/commands/status/subcommands/pricing.py +23 -0
  57. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  58. synth_ai/cli/commands/status/subcommands/session.py +182 -0
  59. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  60. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  61. synth_ai/cli/commands/status/utils.py +114 -0
  62. synth_ai/cli/commands/train/__init__.py +53 -0
  63. synth_ai/cli/commands/train/core.py +22 -0
  64. synth_ai/cli/commands/train/errors.py +117 -0
  65. synth_ai/cli/commands/train/judge_schemas.py +201 -0
  66. synth_ai/cli/commands/train/judge_validation.py +305 -0
  67. synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
  68. synth_ai/cli/commands/train/validation.py +392 -0
  69. synth_ai/cli/demo_apps/__init__.py +10 -0
  70. synth_ai/cli/demo_apps/core/__init__.py +28 -0
  71. synth_ai/cli/demo_apps/core/cli.py +1735 -0
  72. synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
  73. synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
  74. synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
  75. synth_ai/cli/demo_apps/demo_registry.py +176 -0
  76. synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
  77. synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
  78. synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
  79. synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +742 -0
  80. synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
  81. synth_ai/cli/demo_apps/math/__init__.py +1 -0
  82. synth_ai/cli/demo_apps/math/_common.py +16 -0
  83. synth_ai/cli/demo_apps/math/app.py +38 -0
  84. synth_ai/cli/demo_apps/math/config.toml +76 -0
  85. synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
  86. synth_ai/cli/demo_apps/math/modal_task_app.py +702 -0
  87. synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
  88. synth_ai/cli/demo_apps/mipro/main.py +271 -0
  89. synth_ai/cli/demo_apps/mipro/task_app.py +933 -0
  90. synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
  91. synth_ai/cli/demos/__init__.py +12 -0
  92. synth_ai/cli/demos/demo.py +32 -0
  93. synth_ai/cli/demos/rl_demo.py +254 -0
  94. synth_ai/cli/deploy.py +216 -0
  95. synth_ai/cli/infra/__init__.py +14 -0
  96. synth_ai/cli/infra/balance.py +216 -0
  97. synth_ai/cli/infra/mcp.py +35 -0
  98. synth_ai/cli/infra/modal_app.py +36 -0
  99. synth_ai/cli/infra/setup.py +69 -0
  100. synth_ai/cli/infra/status.py +16 -0
  101. synth_ai/cli/infra/turso.py +77 -0
  102. synth_ai/cli/lib/__init__.py +10 -0
  103. synth_ai/cli/lib/agents.py +76 -0
  104. synth_ai/cli/lib/apps/modal_app.py +101 -0
  105. synth_ai/cli/lib/apps/task_app.py +643 -0
  106. synth_ai/cli/lib/bin.py +39 -0
  107. synth_ai/cli/lib/env.py +375 -0
  108. synth_ai/cli/lib/errors.py +85 -0
  109. synth_ai/cli/lib/modal.py +315 -0
  110. synth_ai/cli/lib/plotting.py +126 -0
  111. synth_ai/cli/lib/prompt_args.py +39 -0
  112. synth_ai/cli/lib/prompts.py +284 -0
  113. synth_ai/cli/lib/sqld.py +122 -0
  114. synth_ai/cli/lib/task_app_discovery.py +884 -0
  115. synth_ai/cli/lib/task_app_env.py +295 -0
  116. synth_ai/cli/lib/train_cfgs.py +300 -0
  117. synth_ai/cli/lib/tunnel_records.py +207 -0
  118. synth_ai/cli/local/__init__.py +14 -0
  119. synth_ai/cli/local/experiment_queue/__init__.py +72 -0
  120. synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
  121. synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
  122. synth_ai/cli/local/experiment_queue/config.py +128 -0
  123. synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
  124. synth_ai/cli/local/experiment_queue/database.py +175 -0
  125. synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
  126. synth_ai/cli/local/experiment_queue/models.py +231 -0
  127. synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
  128. synth_ai/cli/local/experiment_queue/results.py +373 -0
  129. synth_ai/cli/local/experiment_queue/schemas.py +131 -0
  130. synth_ai/cli/local/experiment_queue/service.py +344 -0
  131. synth_ai/cli/local/experiment_queue/status.py +372 -0
  132. synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
  133. synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
  134. synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
  135. synth_ai/cli/local/experiment_queue/validation.py +157 -0
  136. synth_ai/cli/local/session/__init__.py +92 -0
  137. synth_ai/cli/local/session/client.py +383 -0
  138. synth_ai/cli/local/session/constants.py +63 -0
  139. synth_ai/cli/local/session/exceptions.py +105 -0
  140. synth_ai/cli/local/session/manager.py +139 -0
  141. synth_ai/cli/local/session/models.py +89 -0
  142. synth_ai/cli/local/session/query.py +110 -0
  143. synth_ai/cli/root.py +30 -103
  144. synth_ai/cli/task_apps/__init__.py +26 -0
  145. synth_ai/cli/task_apps/commands.py +3153 -0
  146. synth_ai/cli/task_apps/deploy.py +7 -0
  147. synth_ai/cli/task_apps/list.py +26 -0
  148. synth_ai/cli/task_apps/main.py +36 -0
  149. synth_ai/cli/task_apps/modal_serve.py +11 -0
  150. synth_ai/cli/task_apps/serve.py +11 -0
  151. synth_ai/cli/training/__init__.py +8 -0
  152. synth_ai/cli/training/train.py +5 -0
  153. synth_ai/cli/training/train_cfg.py +34 -0
  154. synth_ai/cli/training/watch.py +506 -0
  155. synth_ai/cli/turso.py +34 -55
  156. synth_ai/cli/usage.py +159 -0
  157. synth_ai/cli/utils/__init__.py +8 -0
  158. synth_ai/cli/utils/experiments.py +235 -0
  159. synth_ai/cli/utils/queue.py +504 -0
  160. synth_ai/cli/utils/recent.py +133 -0
  161. synth_ai/cli/utils/traces.py +164 -0
  162. synth_ai/contracts/__init__.py +67 -0
  163. synth_ai/core/__init__.py +100 -0
  164. synth_ai/core/_utils/__init__.py +54 -0
  165. synth_ai/core/_utils/base_url.py +10 -0
  166. synth_ai/core/_utils/http.py +10 -0
  167. synth_ai/core/_utils/prompts.py +14 -0
  168. synth_ai/core/_utils/task_app_state.py +12 -0
  169. synth_ai/core/_utils/user_config.py +10 -0
  170. synth_ai/core/apps/common.py +116 -0
  171. synth_ai/core/auth.py +95 -0
  172. synth_ai/core/cfgs.py +240 -0
  173. synth_ai/core/config/__init__.py +16 -0
  174. synth_ai/core/config/base.py +168 -0
  175. synth_ai/core/config/resolver.py +89 -0
  176. synth_ai/core/env.py +220 -0
  177. synth_ai/core/errors.py +126 -0
  178. synth_ai/core/http.py +230 -0
  179. synth_ai/core/integrations/__init__.py +11 -0
  180. synth_ai/core/integrations/cloudflare.py +1710 -0
  181. synth_ai/core/integrations/mcp/__init__.py +6 -0
  182. synth_ai/core/integrations/mcp/__main__.py +8 -0
  183. synth_ai/core/integrations/mcp/claude.py +36 -0
  184. synth_ai/core/integrations/mcp/main.py +254 -0
  185. synth_ai/core/integrations/mcp/setup.py +100 -0
  186. synth_ai/core/integrations/modal.py +277 -0
  187. synth_ai/core/json.py +72 -0
  188. synth_ai/core/log_filter.py +99 -0
  189. synth_ai/core/logging.py +82 -0
  190. synth_ai/core/paths.py +107 -0
  191. synth_ai/core/pricing.py +109 -0
  192. synth_ai/core/process.py +233 -0
  193. synth_ai/core/ssl.py +25 -0
  194. synth_ai/core/storage/__init__.py +71 -0
  195. synth_ai/core/task_app_state.py +318 -0
  196. synth_ai/core/telemetry.py +282 -0
  197. synth_ai/core/tracing_v3/__init__.py +99 -0
  198. synth_ai/core/tracing_v3/config.py +229 -0
  199. synth_ai/core/tracing_v3/constants.py +21 -0
  200. synth_ai/core/tracing_v3/db_config.py +182 -0
  201. synth_ai/core/tracing_v3/decorators.py +401 -0
  202. synth_ai/core/tracing_v3/examples/basic_usage.py +194 -0
  203. synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
  204. synth_ai/core/tracing_v3/migration_helper.py +119 -0
  205. synth_ai/core/tracing_v3/replica_sync.py +262 -0
  206. synth_ai/core/tracing_v3/serialization.py +130 -0
  207. synth_ai/core/tracing_v3/session_tracer.py +542 -0
  208. synth_ai/core/tracing_v3/storage/base.py +211 -0
  209. synth_ai/core/tracing_v3/storage/config.py +109 -0
  210. synth_ai/core/tracing_v3/storage/factory.py +39 -0
  211. synth_ai/core/tracing_v3/storage/utils.py +206 -0
  212. synth_ai/core/tracing_v3/trace_utils.py +326 -0
  213. synth_ai/core/tracing_v3/turso/__init__.py +12 -0
  214. synth_ai/core/tracing_v3/turso/daemon.py +278 -0
  215. synth_ai/core/tracing_v3/turso/models.py +470 -0
  216. synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
  217. synth_ai/core/tracing_v3/utils.py +108 -0
  218. synth_ai/core/urls.py +18 -0
  219. synth_ai/core/user_config.py +137 -0
  220. synth_ai/core/uvicorn.py +222 -0
  221. synth_ai/data/__init__.py +110 -0
  222. synth_ai/data/enums.py +141 -0
  223. synth_ai/data/rewards.py +152 -0
  224. synth_ai/data/specs.py +36 -0
  225. synth_ai/data/traces.py +35 -0
  226. synth_ai/products/__init__.py +6 -0
  227. synth_ai/products/graph_evolve/__init__.py +46 -0
  228. synth_ai/products/graph_evolve/client.py +226 -0
  229. synth_ai/products/graph_evolve/config.py +591 -0
  230. synth_ai/products/graph_evolve/converters/__init__.py +42 -0
  231. synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
  232. synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
  233. synth_ai/products/graph_evolve/run.py +222 -0
  234. synth_ai/sdk/__init__.py +119 -0
  235. synth_ai/sdk/api/__init__.py +1 -0
  236. synth_ai/sdk/api/models/supported.py +514 -0
  237. synth_ai/sdk/api/research_agent/__init__.py +86 -0
  238. synth_ai/sdk/api/research_agent/cli.py +428 -0
  239. synth_ai/sdk/api/research_agent/config.py +357 -0
  240. synth_ai/sdk/api/research_agent/job.py +717 -0
  241. synth_ai/sdk/api/train/__init__.py +85 -0
  242. synth_ai/sdk/api/train/builders.py +895 -0
  243. synth_ai/sdk/api/train/cli.py +2188 -0
  244. synth_ai/sdk/api/train/config_finder.py +267 -0
  245. synth_ai/sdk/api/train/configs/__init__.py +65 -0
  246. synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
  247. synth_ai/sdk/api/train/configs/rl.py +188 -0
  248. synth_ai/sdk/api/train/configs/sft.py +99 -0
  249. synth_ai/sdk/api/train/configs/shared.py +81 -0
  250. synth_ai/sdk/api/train/context_learning.py +312 -0
  251. synth_ai/sdk/api/train/env_resolver.py +418 -0
  252. synth_ai/sdk/api/train/graph_validators.py +216 -0
  253. synth_ai/sdk/api/train/graphgen.py +984 -0
  254. synth_ai/sdk/api/train/graphgen_models.py +823 -0
  255. synth_ai/sdk/api/train/graphgen_validators.py +109 -0
  256. synth_ai/sdk/api/train/pollers.py +124 -0
  257. synth_ai/sdk/api/train/progress/__init__.py +97 -0
  258. synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
  259. synth_ai/sdk/api/train/progress/events.py +326 -0
  260. synth_ai/sdk/api/train/progress/results.py +428 -0
  261. synth_ai/sdk/api/train/progress/tracker.py +641 -0
  262. synth_ai/sdk/api/train/prompt_learning.py +470 -0
  263. synth_ai/sdk/api/train/rl.py +442 -0
  264. synth_ai/sdk/api/train/sft.py +396 -0
  265. synth_ai/sdk/api/train/summary.py +522 -0
  266. synth_ai/sdk/api/train/supported_algos.py +147 -0
  267. synth_ai/sdk/api/train/task_app.py +331 -0
  268. synth_ai/sdk/api/train/utils.py +279 -0
  269. synth_ai/sdk/api/train/validators.py +2424 -0
  270. synth_ai/sdk/baseline/__init__.py +25 -0
  271. synth_ai/sdk/baseline/config.py +209 -0
  272. synth_ai/sdk/baseline/discovery.py +216 -0
  273. synth_ai/sdk/baseline/execution.py +154 -0
  274. synth_ai/sdk/graphs/__init__.py +15 -0
  275. synth_ai/sdk/graphs/completions.py +570 -0
  276. synth_ai/sdk/inference/__init__.py +6 -0
  277. synth_ai/sdk/inference/client.py +128 -0
  278. synth_ai/sdk/jobs/__init__.py +16 -0
  279. synth_ai/sdk/jobs/client.py +371 -0
  280. synth_ai/sdk/judging/__init__.py +15 -0
  281. synth_ai/sdk/judging/base.py +24 -0
  282. synth_ai/sdk/judging/client.py +191 -0
  283. synth_ai/sdk/judging/schemas.py +222 -0
  284. synth_ai/sdk/judging/types.py +42 -0
  285. synth_ai/sdk/learning/__init__.py +69 -0
  286. synth_ai/sdk/learning/client.py +240 -0
  287. synth_ai/sdk/learning/ft_client.py +7 -0
  288. synth_ai/sdk/learning/health.py +49 -0
  289. synth_ai/sdk/learning/jobs.py +202 -0
  290. synth_ai/sdk/learning/prompt_extraction.py +334 -0
  291. synth_ai/sdk/learning/prompt_learning_client.py +455 -0
  292. synth_ai/sdk/learning/prompt_learning_types.py +185 -0
  293. synth_ai/sdk/learning/rl/client.py +268 -0
  294. synth_ai/sdk/learning/rl/contracts.py +27 -0
  295. synth_ai/sdk/learning/rl/env_keys.py +166 -0
  296. synth_ai/sdk/learning/rl/secrets.py +13 -0
  297. synth_ai/sdk/learning/sft/client.py +95 -0
  298. synth_ai/sdk/learning/sft/config.py +270 -0
  299. synth_ai/sdk/learning/sft/data.py +698 -0
  300. synth_ai/sdk/learning/validators.py +52 -0
  301. synth_ai/sdk/research_agent/__init__.py +34 -0
  302. synth_ai/sdk/research_agent/container_builder.py +328 -0
  303. synth_ai/sdk/research_agent/container_spec.py +198 -0
  304. synth_ai/sdk/research_agent/defaults.py +34 -0
  305. synth_ai/sdk/research_agent/results_collector.py +69 -0
  306. synth_ai/sdk/specs/__init__.py +46 -0
  307. synth_ai/sdk/specs/dataclasses.py +149 -0
  308. synth_ai/sdk/specs/loader.py +144 -0
  309. synth_ai/sdk/specs/serializer.py +199 -0
  310. synth_ai/sdk/specs/validation.py +250 -0
  311. synth_ai/sdk/streaming/__init__.py +35 -0
  312. synth_ai/sdk/streaming/config.py +94 -0
  313. synth_ai/sdk/streaming/handlers.py +1997 -0
  314. synth_ai/sdk/streaming/streamer.py +704 -0
  315. synth_ai/sdk/streaming/types.py +112 -0
  316. synth_ai/sdk/task/__init__.py +151 -0
  317. synth_ai/sdk/task/apps/__init__.py +133 -0
  318. synth_ai/sdk/task/config.py +261 -0
  319. synth_ai/sdk/task/contracts.py +298 -0
  320. synth_ai/sdk/task/datasets.py +108 -0
  321. synth_ai/sdk/task/in_process.py +1190 -0
  322. synth_ai/sdk/task/in_process_runner.py +309 -0
  323. synth_ai/sdk/task/inference_api.py +299 -0
  324. synth_ai/sdk/task/proxy.py +287 -0
  325. synth_ai/sdk/task/rubrics/__init__.py +55 -0
  326. synth_ai/sdk/task/rubrics/loaders.py +156 -0
  327. synth_ai/sdk/task/rubrics/models.py +57 -0
  328. synth_ai/sdk/task/rubrics/scoring.py +116 -0
  329. synth_ai/sdk/task/rubrics/strict.py +149 -0
  330. synth_ai/sdk/task/server.py +580 -0
  331. synth_ai/sdk/task/trace_correlation_helpers.py +506 -0
  332. synth_ai/sdk/task/tracing_utils.py +95 -0
  333. synth_ai/sdk/task/validators.py +456 -0
  334. synth_ai/sdk/tracing/__init__.py +39 -0
  335. synth_ai/sdk/training/__init__.py +102 -0
  336. synth_ai/sdk/usage/__init__.py +37 -0
  337. synth_ai/sdk/usage/client.py +171 -0
  338. synth_ai/sdk/usage/models.py +261 -0
  339. synth_ai/utils/__init__.py +213 -0
  340. synth_ai-0.4.1.dist-info/METADATA +195 -0
  341. synth_ai-0.4.1.dist-info/RECORD +379 -0
  342. synth_ai-0.4.1.dist-info/entry_points.txt +2 -0
  343. synth_ai-0.4.1.dist-info/top_level.txt +1 -0
  344. examples/__init__.py +0 -16
  345. examples/analyze_semantic_words.sh +0 -17
  346. examples/crafter_debug_render.py +0 -186
  347. examples/qwen_coder/README.md +0 -102
  348. examples/qwen_coder/_shared.py +0 -113
  349. examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
  350. examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
  351. examples/qwen_coder/configs/coder_lora_small.toml +0 -58
  352. examples/qwen_coder/generate_dataset.py +0 -98
  353. examples/qwen_coder/infer_ft_smoke.py +0 -64
  354. examples/qwen_coder/infer_prod_proxy.py +0 -73
  355. examples/qwen_coder/infer_via_synth.py +0 -87
  356. examples/qwen_coder/scripts/infer_coder.sh +0 -18
  357. examples/qwen_coder/scripts/train_coder_30b.sh +0 -21
  358. examples/qwen_coder/sft_full_17b.py +0 -103
  359. examples/qwen_coder/sft_lora_30b.py +0 -110
  360. examples/qwen_coder/subset_jsonl.py +0 -38
  361. examples/qwen_coder/validate_jsonl.py +0 -59
  362. examples/rl/README.md +0 -169
  363. examples/rl/configs/eval_base_qwen.toml +0 -15
  364. examples/rl/configs/eval_rl_qwen.toml +0 -11
  365. examples/rl/configs/rl_from_base_qwen.toml +0 -35
  366. examples/rl/configs/rl_from_base_qwen17.toml +0 -74
  367. examples/rl/configs/rl_from_ft_qwen.toml +0 -35
  368. examples/rl/download_dataset.py +0 -80
  369. examples/rl/run_eval.py +0 -436
  370. examples/rl/run_rl_and_save.py +0 -111
  371. examples/rl/task_app/README.md +0 -22
  372. examples/rl/task_app/math_single_step.py +0 -991
  373. examples/rl/task_app/math_task_app.py +0 -115
  374. examples/run_crafter_demo.sh +0 -10
  375. examples/sft/README.md +0 -139
  376. examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
  377. examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
  378. examples/sft/evaluate.py +0 -117
  379. examples/sft/export_dataset.py +0 -117
  380. examples/sft/generate_traces.py +0 -162
  381. examples/swe/__init__.py +0 -12
  382. examples/swe/task_app/README.md +0 -105
  383. examples/swe/task_app/__init__.py +0 -2
  384. examples/swe/task_app/grpo_swe_mini.py +0 -571
  385. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
  386. examples/swe/task_app/hosted/README.md +0 -173
  387. examples/swe/task_app/hosted/__init__.py +0 -5
  388. examples/swe/task_app/hosted/branching.py +0 -143
  389. examples/swe/task_app/hosted/environment_routes.py +0 -1289
  390. examples/swe/task_app/hosted/envs/__init__.py +0 -1
  391. examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
  392. examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
  393. examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
  394. examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
  395. examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
  396. examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
  397. examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
  398. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
  399. examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
  400. examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
  401. examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
  402. examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
  403. examples/swe/task_app/hosted/hosted_app.py +0 -204
  404. examples/swe/task_app/hosted/inference/__init__.py +0 -5
  405. examples/swe/task_app/hosted/inference/openai_client.py +0 -618
  406. examples/swe/task_app/hosted/main.py +0 -100
  407. examples/swe/task_app/hosted/policy_routes.py +0 -1079
  408. examples/swe/task_app/hosted/registry.py +0 -195
  409. examples/swe/task_app/hosted/rollout.py +0 -1869
  410. examples/swe/task_app/hosted/storage/__init__.py +0 -5
  411. examples/swe/task_app/hosted/storage/volume.py +0 -211
  412. examples/swe/task_app/hosted/test_agents.py +0 -161
  413. examples/swe/task_app/hosted/test_service.py +0 -137
  414. examples/swe/task_app/hosted/utils.py +0 -62
  415. examples/vlm/README.md +0 -68
  416. examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
  417. examples/vlm/crafter_image_only_agent.py +0 -207
  418. examples/vlm/crafter_openai_vlm_agent.py +0 -277
  419. examples/vlm/filter_image_rows.py +0 -63
  420. examples/vlm/run_crafter_vlm_benchmark.py +0 -316
  421. examples/warming_up_to_rl/analyze_trace_db.py +0 -422
  422. examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
  423. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
  424. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
  425. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
  426. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
  427. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
  428. examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
  429. examples/warming_up_to_rl/export_trace_sft.py +0 -723
  430. examples/warming_up_to_rl/groq_test.py +0 -95
  431. examples/warming_up_to_rl/manage_secrets.py +0 -131
  432. examples/warming_up_to_rl/readme.md +0 -179
  433. examples/warming_up_to_rl/run_eval.py +0 -510
  434. examples/warming_up_to_rl/run_fft_and_save.py +0 -380
  435. examples/warming_up_to_rl/run_local_rollout.py +0 -237
  436. examples/warming_up_to_rl/run_local_rollout_modal.py +0 -246
  437. examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -403
  438. examples/warming_up_to_rl/run_local_rollout_traced.py +0 -475
  439. examples/warming_up_to_rl/run_rl_and_save.py +0 -124
  440. examples/warming_up_to_rl/run_rollout_remote.py +0 -154
  441. examples/warming_up_to_rl/task_app/README.md +0 -42
  442. examples/warming_up_to_rl/task_app/grpo_crafter.py +0 -700
  443. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +0 -146
  444. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +0 -173
  445. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +0 -5
  446. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +0 -143
  447. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +0 -1226
  448. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +0 -1
  449. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
  450. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
  451. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -522
  452. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -478
  453. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -108
  454. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
  455. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
  456. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +0 -204
  457. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +0 -5
  458. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +0 -618
  459. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -100
  460. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +0 -1083
  461. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +0 -195
  462. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +0 -1869
  463. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +0 -5
  464. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +0 -211
  465. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +0 -161
  466. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +0 -137
  467. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  468. synth/__init__.py +0 -14
  469. synth_ai/api/models/supported.py +0 -376
  470. synth_ai/api/train/__init__.py +0 -5
  471. synth_ai/api/train/builders.py +0 -296
  472. synth_ai/api/train/cli.py +0 -606
  473. synth_ai/api/train/config_finder.py +0 -228
  474. synth_ai/api/train/env_resolver.py +0 -347
  475. synth_ai/api/train/pollers.py +0 -75
  476. synth_ai/api/train/supported_algos.py +0 -139
  477. synth_ai/api/train/task_app.py +0 -195
  478. synth_ai/api/train/utils.py +0 -217
  479. synth_ai/cli/_modal_wrapper.py +0 -28
  480. synth_ai/cli/_typer_patch.py +0 -49
  481. synth_ai/cli/balance.py +0 -203
  482. synth_ai/cli/calc.py +0 -69
  483. synth_ai/cli/demo.py +0 -159
  484. synth_ai/cli/legacy_root_backup.py +0 -470
  485. synth_ai/cli/man.py +0 -106
  486. synth_ai/cli/recent.py +0 -127
  487. synth_ai/cli/rl_demo.py +0 -274
  488. synth_ai/cli/status.py +0 -133
  489. synth_ai/cli/task_apps.py +0 -2782
  490. synth_ai/cli/traces.py +0 -163
  491. synth_ai/cli/watch.py +0 -505
  492. synth_ai/config/base_url.py +0 -107
  493. synth_ai/core/experiment.py +0 -13
  494. synth_ai/core/system.py +0 -15
  495. synth_ai/demo_registry.py +0 -295
  496. synth_ai/demos/core/__init__.py +0 -1
  497. synth_ai/demos/core/cli.py +0 -1756
  498. synth_ai/demos/demo_task_apps/core.py +0 -440
  499. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -172
  500. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
  501. synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
  502. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
  503. synth_ai/environments/__init__.py +0 -31
  504. synth_ai/environments/environment/__init__.py +0 -1
  505. synth_ai/environments/environment/artifacts/__init__.py +0 -1
  506. synth_ai/environments/environment/artifacts/base.py +0 -52
  507. synth_ai/environments/environment/core.py +0 -67
  508. synth_ai/environments/environment/db/__init__.py +0 -1
  509. synth_ai/environments/environment/db/sqlite.py +0 -45
  510. synth_ai/environments/environment/registry.py +0 -233
  511. synth_ai/environments/environment/resources/sqlite.py +0 -45
  512. synth_ai/environments/environment/results.py +0 -1
  513. synth_ai/environments/environment/rewards/__init__.py +0 -1
  514. synth_ai/environments/environment/rewards/core.py +0 -29
  515. synth_ai/environments/environment/shared_engine.py +0 -26
  516. synth_ai/environments/environment/tools/__init__.py +0 -200
  517. synth_ai/environments/examples/__init__.py +0 -1
  518. synth_ai/environments/examples/bandit/__init__.py +0 -33
  519. synth_ai/environments/examples/bandit/engine.py +0 -302
  520. synth_ai/environments/examples/bandit/environment.py +0 -194
  521. synth_ai/environments/examples/bandit/taskset.py +0 -200
  522. synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
  523. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
  524. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
  525. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
  526. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
  527. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
  528. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
  529. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
  530. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
  531. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
  532. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
  533. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
  534. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
  535. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
  536. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
  537. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
  538. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
  539. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
  540. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
  541. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
  542. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
  543. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
  544. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
  545. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
  546. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
  547. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
  548. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
  549. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
  550. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
  551. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
  552. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
  553. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
  554. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
  555. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
  556. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
  557. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
  558. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
  559. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
  560. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
  561. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
  562. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
  563. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
  564. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
  565. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
  566. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
  567. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
  568. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
  569. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
  570. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
  571. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
  572. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
  573. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
  574. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
  575. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
  576. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
  577. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
  578. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
  579. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
  580. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
  581. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
  582. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
  583. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
  584. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
  585. synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
  586. synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
  587. synth_ai/environments/examples/crafter_classic/engine.py +0 -579
  588. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
  589. synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
  590. synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
  591. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
  592. synth_ai/environments/examples/crafter_classic/environment.py +0 -479
  593. synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
  594. synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
  595. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
  596. synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
  597. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
  598. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
  599. synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
  600. synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
  601. synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
  602. synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
  603. synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
  604. synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
  605. synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
  606. synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
  607. synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
  608. synth_ai/environments/examples/crafter_custom/environment.py +0 -312
  609. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
  610. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
  611. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
  612. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
  613. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
  614. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
  615. synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
  616. synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
  617. synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
  618. synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
  619. synth_ai/environments/examples/enron/engine.py +0 -295
  620. synth_ai/environments/examples/enron/environment.py +0 -166
  621. synth_ai/environments/examples/enron/taskset.py +0 -112
  622. synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
  623. synth_ai/environments/examples/minigrid/__init__.py +0 -48
  624. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
  625. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
  626. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
  627. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
  628. synth_ai/environments/examples/minigrid/engine.py +0 -589
  629. synth_ai/environments/examples/minigrid/environment.py +0 -274
  630. synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
  631. synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
  632. synth_ai/environments/examples/minigrid/taskset.py +0 -583
  633. synth_ai/environments/examples/nethack/__init__.py +0 -7
  634. synth_ai/environments/examples/nethack/achievements.py +0 -337
  635. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
  636. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
  637. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
  638. synth_ai/environments/examples/nethack/engine.py +0 -739
  639. synth_ai/environments/examples/nethack/environment.py +0 -256
  640. synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
  641. synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
  642. synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
  643. synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
  644. synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
  645. synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
  646. synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
  647. synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
  648. synth_ai/environments/examples/nethack/taskset.py +0 -323
  649. synth_ai/environments/examples/red/__init__.py +0 -7
  650. synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
  651. synth_ai/environments/examples/red/config_logging.py +0 -110
  652. synth_ai/environments/examples/red/engine.py +0 -694
  653. synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
  654. synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
  655. synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
  656. synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
  657. synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
  658. synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
  659. synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
  660. synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
  661. synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
  662. synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
  663. synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
  664. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
  665. synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
  666. synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
  667. synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
  668. synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
  669. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
  670. synth_ai/environments/examples/red/environment.py +0 -238
  671. synth_ai/environments/examples/red/taskset.py +0 -79
  672. synth_ai/environments/examples/red/units/__init__.py +0 -1
  673. synth_ai/environments/examples/sokoban/__init__.py +0 -1
  674. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
  675. synth_ai/environments/examples/sokoban/engine.py +0 -678
  676. synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
  677. synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
  678. synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
  679. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
  680. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
  681. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
  682. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
  683. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
  684. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
  685. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
  686. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
  687. synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
  688. synth_ai/environments/examples/sokoban/environment.py +0 -229
  689. synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
  690. synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
  691. synth_ai/environments/examples/sokoban/taskset.py +0 -428
  692. synth_ai/environments/examples/tictactoe/__init__.py +0 -1
  693. synth_ai/environments/examples/tictactoe/engine.py +0 -368
  694. synth_ai/environments/examples/tictactoe/environment.py +0 -240
  695. synth_ai/environments/examples/tictactoe/taskset.py +0 -215
  696. synth_ai/environments/examples/verilog/__init__.py +0 -10
  697. synth_ai/environments/examples/verilog/engine.py +0 -329
  698. synth_ai/environments/examples/verilog/environment.py +0 -350
  699. synth_ai/environments/examples/verilog/taskset.py +0 -420
  700. synth_ai/environments/examples/wordle/__init__.py +0 -29
  701. synth_ai/environments/examples/wordle/engine.py +0 -398
  702. synth_ai/environments/examples/wordle/environment.py +0 -159
  703. synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
  704. synth_ai/environments/examples/wordle/taskset.py +0 -230
  705. synth_ai/environments/reproducibility/core.py +0 -42
  706. synth_ai/environments/reproducibility/helpers.py +0 -0
  707. synth_ai/environments/reproducibility/tree.py +0 -363
  708. synth_ai/environments/service/app.py +0 -97
  709. synth_ai/environments/service/core_routes.py +0 -1021
  710. synth_ai/environments/service/external_registry.py +0 -56
  711. synth_ai/environments/service/registry.py +0 -9
  712. synth_ai/environments/stateful/__init__.py +0 -1
  713. synth_ai/environments/stateful/core.py +0 -163
  714. synth_ai/environments/stateful/engine.py +0 -21
  715. synth_ai/environments/stateful/state.py +0 -7
  716. synth_ai/environments/tasks/api.py +0 -19
  717. synth_ai/environments/tasks/core.py +0 -81
  718. synth_ai/environments/tasks/filters.py +0 -40
  719. synth_ai/environments/tasks/utils.py +0 -90
  720. synth_ai/environments/v0_observability/history.py +0 -3
  721. synth_ai/environments/v0_observability/log.py +0 -2
  722. synth_ai/evals/base.py +0 -13
  723. synth_ai/handshake.py +0 -109
  724. synth_ai/http.py +0 -26
  725. synth_ai/http_client.py +0 -136
  726. synth_ai/inference/__init__.py +0 -5
  727. synth_ai/inference/client.py +0 -34
  728. synth_ai/jobs/client.py +0 -271
  729. synth_ai/learning/__init__.py +0 -59
  730. synth_ai/learning/client.py +0 -241
  731. synth_ai/learning/ft_client.py +0 -7
  732. synth_ai/learning/health.py +0 -49
  733. synth_ai/learning/jobs.py +0 -201
  734. synth_ai/learning/rl/client.py +0 -267
  735. synth_ai/learning/rl/contracts.py +0 -27
  736. synth_ai/learning/rl/env_keys.py +0 -166
  737. synth_ai/learning/rl/secrets.py +0 -13
  738. synth_ai/learning/sft/client.py +0 -68
  739. synth_ai/learning/sft/config.py +0 -270
  740. synth_ai/learning/sft/data.py +0 -295
  741. synth_ai/learning/validators.py +0 -49
  742. synth_ai/lm/__init__.py +0 -25
  743. synth_ai/main.py +0 -6
  744. synth_ai/task/__init__.py +0 -102
  745. synth_ai/task/apps/__init__.py +0 -128
  746. synth_ai/task/contracts.py +0 -137
  747. synth_ai/task/datasets.py +0 -108
  748. synth_ai/task/proxy.py +0 -259
  749. synth_ai/task/server.py +0 -424
  750. synth_ai/task/tracing_utils.py +0 -84
  751. synth_ai/task/validators.py +0 -11
  752. synth_ai/tracing_v3/__init__.py +0 -97
  753. synth_ai/tracing_v3/config.py +0 -84
  754. synth_ai/tracing_v3/db_config.py +0 -194
  755. synth_ai/tracing_v3/decorators.py +0 -369
  756. synth_ai/tracing_v3/examples/basic_usage.py +0 -189
  757. synth_ai/tracing_v3/llm_call_record_helpers.py +0 -337
  758. synth_ai/tracing_v3/migration_helper.py +0 -120
  759. synth_ai/tracing_v3/replica_sync.py +0 -258
  760. synth_ai/tracing_v3/session_tracer.py +0 -530
  761. synth_ai/tracing_v3/storage/base.py +0 -210
  762. synth_ai/tracing_v3/storage/config.py +0 -75
  763. synth_ai/tracing_v3/storage/factory.py +0 -39
  764. synth_ai/tracing_v3/storage/utils.py +0 -204
  765. synth_ai/tracing_v3/turso/daemon.py +0 -149
  766. synth_ai/tracing_v3/turso/models.py +0 -469
  767. synth_ai/tracing_v3/turso/native_manager.py +0 -1173
  768. synth_ai/tracing_v3/utils.py +0 -108
  769. synth_ai/v0/api/__init__.py +0 -8
  770. synth_ai/v0/api/models/__init__.py +0 -8
  771. synth_ai/v0/api/models/supported.py +0 -8
  772. synth_ai/v0/config/__init__.py +0 -15
  773. synth_ai/v0/config/base_url.py +0 -12
  774. synth_ai/v0/lm/__init__.py +0 -51
  775. synth_ai/v0/lm/caching/constants.py +0 -6
  776. synth_ai/v0/lm/caching/dbs.py +0 -0
  777. synth_ai/v0/lm/caching/ephemeral.py +0 -100
  778. synth_ai/v0/lm/caching/handler.py +0 -137
  779. synth_ai/v0/lm/caching/initialize.py +0 -11
  780. synth_ai/v0/lm/caching/persistent.py +0 -114
  781. synth_ai/v0/lm/config.py +0 -115
  782. synth_ai/v0/lm/constants.py +0 -32
  783. synth_ai/v0/lm/core/__init__.py +0 -8
  784. synth_ai/v0/lm/core/all.py +0 -73
  785. synth_ai/v0/lm/core/exceptions.py +0 -5
  786. synth_ai/v0/lm/core/main.py +0 -331
  787. synth_ai/v0/lm/core/main_v3.py +0 -594
  788. synth_ai/v0/lm/core/synth_models.py +0 -35
  789. synth_ai/v0/lm/core/vendor_clients.py +0 -190
  790. synth_ai/v0/lm/cost/__init__.py +0 -0
  791. synth_ai/v0/lm/cost/monitor.py +0 -1
  792. synth_ai/v0/lm/cost/statefulness.py +0 -1
  793. synth_ai/v0/lm/injection.py +0 -80
  794. synth_ai/v0/lm/overrides.py +0 -206
  795. synth_ai/v0/lm/provider_support/__init__.py +0 -8
  796. synth_ai/v0/lm/provider_support/anthropic.py +0 -972
  797. synth_ai/v0/lm/provider_support/openai.py +0 -1139
  798. synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
  799. synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
  800. synth_ai/v0/lm/structured_outputs/handler.py +0 -440
  801. synth_ai/v0/lm/structured_outputs/inject.py +0 -297
  802. synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
  803. synth_ai/v0/lm/tools/__init__.py +0 -3
  804. synth_ai/v0/lm/tools/base.py +0 -172
  805. synth_ai/v0/lm/unified_interface.py +0 -202
  806. synth_ai/v0/lm/vendors/__init__.py +0 -0
  807. synth_ai/v0/lm/vendors/base.py +0 -81
  808. synth_ai/v0/lm/vendors/core/__init__.py +0 -0
  809. synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
  810. synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
  811. synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
  812. synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
  813. synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
  814. synth_ai/v0/lm/vendors/local/__init__.py +0 -0
  815. synth_ai/v0/lm/vendors/local/ollama.py +0 -0
  816. synth_ai/v0/lm/vendors/openai_standard.py +0 -782
  817. synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
  818. synth_ai/v0/lm/vendors/retries.py +0 -22
  819. synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
  820. synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
  821. synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
  822. synth_ai/v0/lm/vendors/supported/grok.py +0 -75
  823. synth_ai/v0/lm/vendors/supported/groq.py +0 -16
  824. synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
  825. synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
  826. synth_ai/v0/lm/vendors/supported/together.py +0 -11
  827. synth_ai/v0/lm/vendors/synth_client.py +0 -835
  828. synth_ai/v0/lm/warmup.py +0 -186
  829. synth_ai/v0/tracing/__init__.py +0 -0
  830. synth_ai/v0/tracing/abstractions.py +0 -224
  831. synth_ai/v0/tracing/base_client.py +0 -91
  832. synth_ai/v0/tracing/client_manager.py +0 -131
  833. synth_ai/v0/tracing/config.py +0 -142
  834. synth_ai/v0/tracing/context.py +0 -146
  835. synth_ai/v0/tracing/decorators.py +0 -682
  836. synth_ai/v0/tracing/events/__init__.py +0 -0
  837. synth_ai/v0/tracing/events/manage.py +0 -147
  838. synth_ai/v0/tracing/events/scope.py +0 -86
  839. synth_ai/v0/tracing/events/store.py +0 -228
  840. synth_ai/v0/tracing/immediate_client.py +0 -151
  841. synth_ai/v0/tracing/local.py +0 -18
  842. synth_ai/v0/tracing/log_client_base.py +0 -73
  843. synth_ai/v0/tracing/retry_queue.py +0 -186
  844. synth_ai/v0/tracing/trackers.py +0 -515
  845. synth_ai/v0/tracing/upload.py +0 -409
  846. synth_ai/v0/tracing/utils.py +0 -9
  847. synth_ai/v0/tracing_v1/__init__.py +0 -16
  848. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  849. synth_ai/v0/tracing_v1/base_client.py +0 -91
  850. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  851. synth_ai/v0/tracing_v1/config.py +0 -142
  852. synth_ai/v0/tracing_v1/context.py +0 -146
  853. synth_ai/v0/tracing_v1/decorators.py +0 -703
  854. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  855. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  856. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  857. synth_ai/v0/tracing_v1/events/store.py +0 -228
  858. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  859. synth_ai/v0/tracing_v1/local.py +0 -18
  860. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  861. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  862. synth_ai/v0/tracing_v1/trackers.py +0 -515
  863. synth_ai/v0/tracing_v1/upload.py +0 -527
  864. synth_ai/v0/tracing_v1/utils.py +0 -9
  865. synth_ai/v0/tracing_v3/__init__.py +0 -10
  866. synth_ai/v0/tracing_v3/abstractions.py +0 -3
  867. synth_ai/v0/tracing_v3/decorators.py +0 -3
  868. synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
  869. synth_ai/v0/tracing_v3/session_tracer.py +0 -3
  870. synth_ai-0.2.9.dev11.dist-info/METADATA +0 -191
  871. synth_ai-0.2.9.dev11.dist-info/RECORD +0 -571
  872. synth_ai-0.2.9.dev11.dist-info/entry_points.txt +0 -3
  873. synth_ai-0.2.9.dev11.dist-info/top_level.txt +0 -3
  874. /synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
  875. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
  876. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
  877. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
  878. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
  879. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
  880. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
  881. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/config.toml +0 -0
  882. /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
  883. /synth_ai/{v0/lm/caching → core/apps}/__init__.py +0 -0
  884. /synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +0 -0
  885. /synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
  886. /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
  887. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
  888. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
  889. /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
  890. /synth_ai/{compound/cais.py → py.typed} +0 -0
  891. /synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
  892. /synth_ai/{learning → sdk/learning}/config.py +0 -0
  893. /synth_ai/{learning → sdk/learning}/constants.py +0 -0
  894. /synth_ai/{learning → sdk/learning}/core.py +0 -0
  895. /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
  896. /synth_ai/{learning → sdk/learning}/rl/__init__.py +0 -0
  897. /synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
  898. /synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
  899. /synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
  900. /synth_ai/{learning → sdk/learning}/sse.py +0 -0
  901. /synth_ai/{task → sdk/task}/auth.py +0 -0
  902. /synth_ai/{task → sdk/task}/client.py +0 -0
  903. /synth_ai/{task → sdk/task}/errors.py +0 -0
  904. /synth_ai/{task → sdk/task}/health.py +0 -0
  905. /synth_ai/{task → sdk/task}/json.py +0 -0
  906. /synth_ai/{task → sdk/task}/rubrics.py +0 -0
  907. /synth_ai/{task → sdk/task}/vendors.py +0 -0
  908. {synth_ai-0.2.9.dev11.dist-info → synth_ai-0.4.1.dist-info}/WHEEL +0 -0
  909. {synth_ai-0.2.9.dev11.dist-info → synth_ai-0.4.1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1984 @@
1
+ """Celery task definitions for running experiment jobs."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import re
8
+ import shlex
9
+ import subprocess
10
+ import sys
11
+ import threading
12
+ import time
13
+ from datetime import UTC, datetime
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from celery.utils.log import get_task_logger
18
+ from dotenv import load_dotenv
19
+
20
+ from .api_schemas import BackendEventsResponse
21
+ from .celery_app import celery_app
22
+ from .config import load_config
23
+ from .config_utils import PreparedConfig, prepare_config_file
24
+ from .database import session_scope
25
+ from .dispatcher import dispatch_available_jobs
26
+ from .models import (
27
+ Experiment,
28
+ ExperimentJob,
29
+ ExperimentJobStatus,
30
+ ExperimentStatus,
31
+ JobExecutionLog,
32
+ )
33
+ from .results import ResultSummary, collect_result_summary
34
+ from .status import ExperimentStatusTracker
35
+ from .status_tracker import extract_config_info, update_status_from_output
36
+ from .trace_storage import persist_trials_from_summary, update_experiment_metadata
37
+
38
+ logger = get_task_logger(__name__)
39
+
40
+
41
+ TRAIN_COMMAND_ENV = "EXPERIMENT_QUEUE_TRAIN_CMD"
42
+
43
+
44
+ def _load_synth_api_key() -> str:
45
+ """Load SYNTH_API_KEY from .env file and fail loudly if not found.
46
+
47
+ Never falls back to other sources - must be explicitly set in .env file.
48
+
49
+ Returns:
50
+ The API key as a string.
51
+
52
+ Raises:
53
+ RuntimeError: If SYNTH_API_KEY is not found in .env file.
54
+ """
55
+ # Find .env file - check synth-ai root first, then current directory
56
+ repo_root = Path(__file__).resolve().parents[3] # synth_ai/experiment_queue/tasks.py -> synth-ai/
57
+ env_file = repo_root / ".env"
58
+
59
+ if not env_file.exists():
60
+ # Try current directory as fallback
61
+ env_file = Path(".env")
62
+
63
+ if env_file.exists():
64
+ load_dotenv(env_file, override=False) # Don't override existing env vars
65
+
66
+ api_key = os.getenv("SYNTH_API_KEY")
67
+
68
+ if not api_key:
69
+ raise RuntimeError(
70
+ f"❌ SYNTH_API_KEY not found! "
71
+ f"Please set it in {env_file.resolve() if env_file.exists() else 'synth-ai/.env'}. "
72
+ f"No fallback - API key must be explicitly set."
73
+ )
74
+
75
+ return api_key
76
+
77
+
78
+ def _find_venv_python() -> str:
79
+ """Find the venv Python executable to avoid uv cache permission issues.
80
+
81
+ Checks in order:
82
+ 1. sys.executable if already in a venv
83
+ 2. .venv/bin/python relative to current working directory
84
+ 3. .venv/bin/python relative to repo root (if synth_ai package is installed)
85
+ 4. Falls back to 'python' if venv not found
86
+ """
87
+ # If we're already running in a venv, use that
88
+ if sys.executable and ("venv" in sys.executable or ".venv" in sys.executable):
89
+ return sys.executable
90
+
91
+ # Check .venv/bin/python relative to current working directory
92
+ cwd_venv = Path.cwd() / ".venv" / "bin" / "python"
93
+ if cwd_venv.exists() and os.access(cwd_venv, os.X_OK):
94
+ return str(cwd_venv)
95
+
96
+ # Check .venv/bin/python relative to synth_ai package location
97
+ try:
98
+ import synth_ai
99
+
100
+ package_path = Path(synth_ai.__file__ or Path(__file__).resolve()).parent.parent.parent
101
+ pkg_venv = package_path / ".venv" / "bin" / "python"
102
+ if pkg_venv.exists() and os.access(pkg_venv, os.X_OK):
103
+ return str(pkg_venv)
104
+ except Exception:
105
+ pass
106
+
107
+ # Fallback to system python
108
+ return "python"
109
+
110
+
111
+ def _get_default_train_cmd() -> str:
112
+ """Get the default training command, evaluating venv path lazily.
113
+
114
+ This is called when building the command, not at module import time,
115
+ so it can properly detect the venv based on the current working directory.
116
+ """
117
+ return f"{_find_venv_python()} -m synth_ai.cli train"
118
+
119
+
120
+ def _extract_backend_job_id(output: str) -> str | None:
121
+ """Extract backend job ID from subprocess output.
122
+
123
+ Looks for patterns like:
124
+ - JSON: "job_id": "pl_xxxxx"
125
+ - Pattern: pl_[a-f0-9]+
126
+
127
+ Args:
128
+ output: Subprocess stdout/stderr output
129
+
130
+ Returns:
131
+ Backend job ID if found, None otherwise
132
+
133
+ Raises:
134
+ AssertionError: If extracted ID doesn't match expected format
135
+ """
136
+ if not output:
137
+ return None
138
+
139
+ # Assert output is a string
140
+ assert isinstance(output, str), f"Expected str, got {type(output).__name__}"
141
+
142
+ # Look for job_id in JSON response
143
+ match = re.search(r'"job_id"\s*:\s*"([^"]+)"', output)
144
+ if match:
145
+ job_id = match.group(1)
146
+ # Validate format
147
+ assert job_id.startswith("pl_"), f"Extracted job_id doesn't match expected format 'pl_*': {job_id}"
148
+ assert len(job_id) > 3, f"Extracted job_id too short: {job_id}"
149
+ return job_id
150
+
151
+ # Try pattern pl_xxxxx
152
+ match = re.search(r'pl_[a-f0-9]+', output)
153
+ if match:
154
+ job_id = match.group(0)
155
+ # Validate format
156
+ assert job_id.startswith("pl_"), f"Extracted job_id doesn't match expected format 'pl_*': {job_id}"
157
+ assert len(job_id) > 3, f"Extracted job_id too short: {job_id}"
158
+ return job_id
159
+
160
+ return None
161
+
162
+
163
+ def _poll_backend_progress(
164
+ backend_job_id: str,
165
+ status_tracker: ExperimentStatusTracker,
166
+ policy: str | None,
167
+ environment: str | None,
168
+ backend_url: str,
169
+ api_key: str,
170
+ stop_event: threading.Event,
171
+ job_start_time: float | None = None,
172
+ ) -> None:
173
+ """Poll backend API for progress events and update status_json.
174
+
175
+ Polls the backend API endpoint `/prompt-learning/online/jobs/{backend_job_id}/events`
176
+ every 5 seconds to fetch `prompt.learning.progress` events containing rollouts,
177
+ ETA, and best score information. Updates the experiment status_json in real-time.
178
+
179
+ Backend URL Configuration:
180
+ - Default: Production (https://api.usesynth.ai/api)
181
+ - Local: Set EXPERIMENT_QUEUE_LOCAL=true or use --local flag (http://localhost:8000/api)
182
+ - Custom: Set EXPERIMENT_QUEUE_BACKEND_URL env var
183
+
184
+ Args:
185
+ backend_job_id: Backend job ID to poll (e.g., "pl_xxxxx")
186
+ status_tracker: ExperimentStatusTracker instance for updating status_json
187
+ policy: Policy model name (e.g., "gpt-4", "llama-3.1-8b-instant")
188
+ environment: Environment name (e.g., "heartdisease", "hotpotqa")
189
+ backend_url: Backend API base URL (from config.backend_url)
190
+ api_key: API key for authentication (from SYNTH_API_KEY env var)
191
+ stop_event: Threading event to signal when to stop polling
192
+ """
193
+ import logging
194
+ import os
195
+
196
+ import requests
197
+
198
+ # Import BackendJobEvent locally to ensure it's available in this function's scope
199
+ from .api_schemas import BackendJobEvent # noqa: F811
200
+
201
+ # Get logger for this thread (logger from parent thread may not work correctly)
202
+ poller_logger = logging.getLogger(f"synth_ai.cli.local.experiment_queue.poller.{backend_job_id}")
203
+
204
+ # Set log level from environment variable if set (allows --loglevel flag to control verbosity)
205
+ # Use Celery's logger hierarchy instead of creating our own handler to avoid duplicates
206
+ log_level_env = os.getenv("EXPERIMENT_QUEUE_LOG_LEVEL", "INFO").upper()
207
+ try:
208
+ log_level = getattr(logging, log_level_env)
209
+ poller_logger.setLevel(log_level)
210
+ # Don't create handlers - let Celery's logging handle it
211
+ # Just propagate to parent logger (Celery's task logger)
212
+ poller_logger.propagate = True
213
+ except (AttributeError, ValueError):
214
+ # Invalid log level, use default
215
+ pass
216
+
217
+ # Validate inputs with assertions
218
+ assert backend_job_id, "backend_job_id cannot be empty"
219
+ assert backend_job_id.startswith("pl_"), f"Invalid backend_job_id format: expected 'pl_*', got '{backend_job_id}'"
220
+ assert backend_url, "backend_url cannot be empty"
221
+ assert backend_url.startswith(("http://", "https://")), f"Invalid backend_url format: {backend_url}"
222
+ assert api_key, "api_key cannot be empty"
223
+ assert status_tracker is not None, "status_tracker cannot be None"
224
+ assert stop_event is not None, "stop_event cannot be None"
225
+
226
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{backend_job_id}/events"
227
+ headers = {"Authorization": f"Bearer {api_key}"}
228
+ last_seq = 0
229
+ progress_start_time: float | None = None # Track when we first see progress
230
+ consecutive_timeouts = 0 # Track consecutive timeouts for exponential backoff
231
+ base_poll_interval = 5.0 # Base polling interval in seconds
232
+
233
+ # ✅ ADD: Track last progress update time to detect stuck jobs
234
+ last_progress_time: float | None = None
235
+ last_rollouts_completed: int | None = None
236
+ last_progress_seq = 0
237
+ stuck_threshold_seconds = 600.0 # 10 minutes without progress = stuck
238
+
239
+ poller_logger.info("📡 Starting progress poller for backend job %s (URL: %s)", backend_job_id, url)
240
+
241
+ while not stop_event.is_set():
242
+ events_received = 0
243
+ try:
244
+ # Assert URL is valid before making request
245
+ assert url.startswith(("http://", "https://")), f"Invalid URL format: {url}"
246
+
247
+ poller_logger.info("Polling backend API: %s (since_seq: %d)", url, last_seq)
248
+
249
+ try:
250
+ resp = requests.get(
251
+ url,
252
+ headers=headers,
253
+ params={"since_seq": last_seq, "limit": 100},
254
+ timeout=120, # Increased to 120s to handle slow backend/PostgREST responses
255
+ )
256
+ except (requests.exceptions.ConnectionError, requests.exceptions.Timeout) as e:
257
+ # ✅ ADD: Detect connection pool exhaustion in poller
258
+ error_str = str(e).lower()
259
+ is_pool_exhausted = (
260
+ "connection" in error_str
261
+ or "timeout" in error_str
262
+ or "refused" in error_str
263
+ )
264
+ if is_pool_exhausted:
265
+ # 🔥 VERY LOUD ERROR MESSAGES FOR CONNECTION POOL ISSUES IN POLLER
266
+ print("=" * 100, flush=True)
267
+ print("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER) 🔥🔥🔥", flush=True)
268
+ print("=" * 100, flush=True)
269
+ print(f"Backend Job ID: {backend_job_id}", flush=True)
270
+ print(f"URL: {url}", flush=True)
271
+ print(f"Error: {type(e).__name__}: {str(e)}", flush=True)
272
+ print("=" * 100, flush=True)
273
+ print("⚠️ Cannot fetch events - connection pool may be exhausted!", flush=True)
274
+ print("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables", flush=True)
275
+ print("=" * 100, flush=True)
276
+
277
+ poller_logger.error("=" * 100)
278
+ poller_logger.error("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER) 🔥🔥🔥")
279
+ poller_logger.error("=" * 100)
280
+ poller_logger.error("Backend Job ID: %s | URL: %s", backend_job_id, url)
281
+ poller_logger.error("Error: %s: %s", type(e).__name__, str(e))
282
+ poller_logger.error("⚠️ Cannot fetch events - connection pool may be exhausted!")
283
+ poller_logger.error("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables")
284
+ poller_logger.error("=" * 100)
285
+ raise
286
+
287
+ # Assert we got a response object
288
+ assert resp is not None, "requests.get() returned None"
289
+
290
+ poller_logger.info("API response: status=%d, content_length=%d", resp.status_code, len(resp.content))
291
+
292
+ # ✅ ADD: Detect connection pool exhaustion in HTTP error responses
293
+ if resp.status_code not in (200, 201):
294
+ body_text = (resp.text or "")[:500].lower()
295
+ is_pool_exhausted = (
296
+ resp.status_code == 503 # Service Unavailable
297
+ or resp.status_code == 429 # Too Many Requests (after long wait)
298
+ or "connection pool" in body_text
299
+ or "too many clients" in body_text
300
+ or "maxclients" in body_text
301
+ or "max clients" in body_text
302
+ or "connection refused" in body_text
303
+ )
304
+
305
+ if is_pool_exhausted:
306
+ # 🔥 VERY LOUD ERROR MESSAGES FOR CONNECTION POOL ISSUES IN POLLER
307
+ print("=" * 100, flush=True)
308
+ print("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER HTTP ERROR) 🔥🔥🔥", flush=True)
309
+ print("=" * 100, flush=True)
310
+ print(f"Backend Job ID: {backend_job_id}", flush=True)
311
+ print(f"URL: {url}", flush=True)
312
+ print(f"HTTP Status: {resp.status_code}", flush=True)
313
+ print(f"Response Body: {resp.text[:500]}", flush=True)
314
+ print("=" * 100, flush=True)
315
+ print("⚠️ Cannot fetch events - connection pool may be exhausted!", flush=True)
316
+ print("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables", flush=True)
317
+ print("=" * 100, flush=True)
318
+
319
+ poller_logger.error("=" * 100)
320
+ poller_logger.error("🔥🔥🔥 CONNECTION POOL EXHAUSTION DETECTED (POLLER HTTP ERROR) 🔥🔥🔥")
321
+ poller_logger.error("=" * 100)
322
+ poller_logger.error("Backend Job ID: %s | URL: %s | HTTP: %d", backend_job_id, url, resp.status_code)
323
+ poller_logger.error("Response Body: %s", resp.text[:500])
324
+ poller_logger.error("⚠️ Cannot fetch events - connection pool may be exhausted!")
325
+ poller_logger.error("⚠️ Check DB_POOL_SIZE and DB_MAX_OVERFLOW environment variables")
326
+ poller_logger.error("=" * 100)
327
+
328
+ if resp.status_code == 200:
329
+ # Parse and validate API response using Pydantic models
330
+ try:
331
+ raw_data = resp.json()
332
+ # Assert response is not None
333
+ assert raw_data is not None, "API returned None response"
334
+
335
+ # Parse response with validation
336
+ assert isinstance(raw_data, dict | list), (
337
+ f"API response must be dict or list, got {type(raw_data).__name__}: {raw_data}"
338
+ )
339
+
340
+ events_response = BackendEventsResponse.parse_response(raw_data)
341
+ assert isinstance(events_response, BackendEventsResponse), (
342
+ f"parse_response returned wrong type: {type(events_response).__name__}"
343
+ )
344
+ assert isinstance(events_response.events, list), (
345
+ f"events_response.events must be list, got {type(events_response.events).__name__}"
346
+ )
347
+
348
+ events_received = len(events_response.events)
349
+ assert events_received >= 0, (
350
+ f"events_received must be >= 0, got {events_received}"
351
+ )
352
+
353
+ # Process each event
354
+ event_types_seen: dict[str, int] = {}
355
+ for idx, event in enumerate(events_response.events):
356
+ # Assert event is BackendJobEvent instance
357
+ assert isinstance(event, BackendJobEvent), (
358
+ f"Event at index {idx} must be BackendJobEvent, got {type(event).__name__}"
359
+ )
360
+ # Assert event has required fields
361
+ assert event.seq >= 0, f"Invalid seq: {event.seq}"
362
+ assert event.type, f"Event missing type field: {event}"
363
+ assert event.message, f"Event missing message field: {event}"
364
+
365
+ # Track event types for debugging
366
+ event_types_seen[event.type] = event_types_seen.get(event.type, 0) + 1
367
+
368
+ # Check if this is a progress event
369
+ if event.type == "prompt.learning.progress":
370
+ poller_logger.info(
371
+ "Found progress event seq=%d: %s",
372
+ event.seq,
373
+ event.message[:100],
374
+ )
375
+ # Extract progress data with validation
376
+ progress_data = event.get_progress_data()
377
+ if progress_data is None:
378
+ poller_logger.warning(
379
+ "Progress event seq=%d has no parseable data. Event data: %s",
380
+ event.seq,
381
+ event.data,
382
+ )
383
+ continue
384
+
385
+ poller_logger.debug(
386
+ "Progress event seq=%d data: rollouts_completed=%s, rollouts_total=%s, best_score=%s, eta=%s",
387
+ event.seq,
388
+ progress_data.rollouts_completed,
389
+ progress_data.effective_rollouts_total,
390
+ progress_data.effective_best_score,
391
+ progress_data.eta_seconds,
392
+ )
393
+
394
+ # Use effective getters that handle field name variations
395
+ rollouts_completed = progress_data.rollouts_completed
396
+ rollouts_total = progress_data.effective_rollouts_total
397
+ eta_seconds = progress_data.eta_seconds
398
+ # percent_rollouts from backend is 0-1, convert to 0-100 for display
399
+ progress_pct = None
400
+ if progress_data.percent_rollouts is not None:
401
+ progress_pct = progress_data.percent_rollouts * 100.0
402
+ elif progress_data.percent_overall is not None:
403
+ # Fallback to percent_overall if percent_rollouts not available
404
+ progress_pct = progress_data.percent_overall * 100.0
405
+ best_score = progress_data.effective_best_score
406
+
407
+ # Track when we first see progress (for rollouts/min calculation)
408
+ if rollouts_completed is not None and rollouts_completed > 0 and progress_start_time is None:
409
+ progress_start_time = time.time()
410
+
411
+ # Calculate rollouts/min if we have progress and timing info
412
+ rollouts_per_minute = None
413
+ if rollouts_completed is not None and rollouts_completed > 0:
414
+ # Use progress_start_time if available, otherwise fall back to job_start_time
415
+ start_time_for_rate = progress_start_time or job_start_time
416
+ if start_time_for_rate is not None:
417
+ elapsed = time.time() - start_time_for_rate
418
+ if elapsed > 0:
419
+ rate_per_second = rollouts_completed / elapsed
420
+ rollouts_per_minute = rate_per_second * 60.0
421
+
422
+ # Assert data types and ranges
423
+ if rollouts_completed is not None:
424
+ assert isinstance(rollouts_completed, int), (
425
+ f"rollouts_completed must be int, got {type(rollouts_completed).__name__}: {rollouts_completed}"
426
+ )
427
+ assert rollouts_completed >= 0, (
428
+ f"rollouts_completed must be >= 0, got {rollouts_completed}"
429
+ )
430
+
431
+ if rollouts_total is not None:
432
+ assert isinstance(rollouts_total, int), (
433
+ f"rollouts_total must be int, got {type(rollouts_total).__name__}: {rollouts_total}"
434
+ )
435
+ assert rollouts_total > 0, (
436
+ f"rollouts_total must be > 0, got {rollouts_total}"
437
+ )
438
+
439
+ if eta_seconds is not None:
440
+ assert isinstance(eta_seconds, int | float), (
441
+ f"eta_seconds must be int | float, got {type(eta_seconds).__name__}: {eta_seconds}"
442
+ )
443
+ assert eta_seconds >= 0, (
444
+ f"eta_seconds must be >= 0, got {eta_seconds}"
445
+ )
446
+
447
+ if best_score is not None:
448
+ assert isinstance(best_score, int | float), (
449
+ f"best_score must be int | float, got {type(best_score).__name__}: {best_score}"
450
+ )
451
+ assert 0 <= best_score <= 1, (
452
+ f"best_score must be in [0, 1], got {best_score}"
453
+ )
454
+
455
+ if progress_pct is not None:
456
+ assert isinstance(progress_pct, int | float), (
457
+ f"progress_pct must be int | float, got {type(progress_pct).__name__}: {progress_pct}"
458
+ )
459
+ assert 0 <= progress_pct <= 100, (
460
+ f"progress_pct must be in [0, 100], got {progress_pct}"
461
+ )
462
+
463
+ # Assert consistency: rollouts_completed <= rollouts_total
464
+ if rollouts_completed is not None and rollouts_total is not None:
465
+ assert rollouts_completed <= rollouts_total, (
466
+ f"rollouts_completed ({rollouts_completed}) > rollouts_total ({rollouts_total})"
467
+ )
468
+
469
+ # Assert we have meaningful progress data
470
+ has_progress = (
471
+ rollouts_completed is not None
472
+ or best_score is not None
473
+ or rollouts_total is not None
474
+ )
475
+
476
+ # ✅ Initialize custom_fields before use (extract from event data for validation phase tracking)
477
+ custom_fields: dict[str, Any] = {}
478
+ if event.data and isinstance(event.data, dict):
479
+ # Extract phase and validation info if present
480
+ phase = event.data.get("phase")
481
+ if phase == "validation":
482
+ custom_fields["phase"] = "validation"
483
+ if "validation_candidate" in event.data:
484
+ custom_fields["validation_candidate"] = event.data["validation_candidate"]
485
+ if "validation_total" in event.data:
486
+ custom_fields["validation_total"] = event.data["validation_total"]
487
+
488
+ if has_progress:
489
+ # Validate status_tracker before update
490
+ assert status_tracker is not None, "status_tracker is None"
491
+ assert hasattr(status_tracker, "update"), "status_tracker missing update method"
492
+ assert hasattr(status_tracker, "job_id"), "status_tracker missing job_id"
493
+
494
+ status_tracker.update(
495
+ policy=policy,
496
+ environment=environment,
497
+ rollouts_completed=rollouts_completed,
498
+ total_rollouts=rollouts_total,
499
+ eta_seconds=eta_seconds,
500
+ progress_pct=progress_pct,
501
+ best_score=best_score,
502
+ rollouts_per_minute=rollouts_per_minute,
503
+ custom_fields=custom_fields if custom_fields else None,
504
+ )
505
+
506
+ # ✅ ADD: Track progress for stuck detection
507
+ import time as _time_module
508
+ current_time = _time_module.time()
509
+ if rollouts_completed is not None:
510
+ if last_rollouts_completed is None or rollouts_completed != last_rollouts_completed:
511
+ # Progress changed - update tracking
512
+ last_progress_time = current_time
513
+ last_rollouts_completed = rollouts_completed
514
+ last_progress_seq = event.seq
515
+ poller_logger.info(
516
+ "📊 Progress update for job %s: %s/%s rollouts, ETA: %s, Best: %s",
517
+ backend_job_id,
518
+ rollouts_completed,
519
+ rollouts_total,
520
+ eta_seconds,
521
+ best_score,
522
+ )
523
+ elif last_progress_time is not None:
524
+ # Check if stuck (no progress for threshold time)
525
+ time_since_progress = current_time - last_progress_time
526
+ if time_since_progress >= stuck_threshold_seconds:
527
+ poller_logger.warning(
528
+ "⚠️ Job %s appears STUCK: No progress for %.1f minutes (last: %s/%s rollouts at seq %d)",
529
+ backend_job_id,
530
+ time_since_progress / 60.0,
531
+ last_rollouts_completed,
532
+ rollouts_total,
533
+ last_progress_seq,
534
+ )
535
+ # Emit warning event
536
+ with contextlib.suppress(Exception):
537
+ status_tracker.update(
538
+ custom_fields={
539
+ **(custom_fields or {}),
540
+ "stuck_warning": True,
541
+ "time_since_progress_seconds": time_since_progress,
542
+ }
543
+ )
544
+ else:
545
+ # No rollouts info - log anyway
546
+ poller_logger.info(
547
+ "📊 Progress update for job %s: %s/%s rollouts, ETA: %s, Best: %s",
548
+ backend_job_id,
549
+ rollouts_completed,
550
+ rollouts_total,
551
+ eta_seconds,
552
+ best_score,
553
+ )
554
+
555
+ # Update last_seq (always update, even if no progress data)
556
+ last_seq = max(last_seq, event.seq)
557
+ else:
558
+ # Non-progress event - just update seq
559
+ last_seq = max(last_seq, event.seq)
560
+
561
+ # ✅ ADD: Track consecutive polls with no new events
562
+ if events_received == 0:
563
+ # Increment counter for no-event polls
564
+ if not hasattr(_poll_backend_progress, '_no_event_polls'):
565
+ _poll_backend_progress._no_event_polls = {} # type: ignore[attr-defined]
566
+ if backend_job_id not in _poll_backend_progress._no_event_polls: # type: ignore[attr-defined]
567
+ _poll_backend_progress._no_event_polls[backend_job_id] = 0 # type: ignore[attr-defined]
568
+ _poll_backend_progress._no_event_polls[backend_job_id] += 1 # type: ignore[attr-defined]
569
+ no_event_count = _poll_backend_progress._no_event_polls[backend_job_id] # type: ignore[attr-defined]
570
+
571
+ # Warn if we've had many consecutive polls with no events
572
+ if no_event_count >= 12: # 12 polls * 5s = 60s with no events
573
+ poller_logger.warning(
574
+ "⚠️ Job %s: No new events for %d consecutive polls (~%ds). Last seq: %d. Job may be stuck.",
575
+ backend_job_id,
576
+ no_event_count,
577
+ no_event_count * int(base_poll_interval),
578
+ last_seq,
579
+ )
580
+ # Emit warning in status_json
581
+ with contextlib.suppress(Exception):
582
+ status_tracker.update(
583
+ custom_fields={
584
+ "no_event_polls": no_event_count,
585
+ "last_event_seq": last_seq,
586
+ "stuck_warning": True,
587
+ }
588
+ )
589
+
590
+ poller_logger.info("Progress poller heartbeat for job %s (no new events, last_seq=%d, consecutive_no_events=%d)", backend_job_id, last_seq, no_event_count)
591
+ else:
592
+ # Reset counter when we get events
593
+ if hasattr(_poll_backend_progress, '_no_event_polls') and backend_job_id in _poll_backend_progress._no_event_polls: # type: ignore[attr-defined]
594
+ _poll_backend_progress._no_event_polls[backend_job_id] = 0 # type: ignore[attr-defined]
595
+
596
+ event_types_str = ", ".join(f"{k}:{v}" for k, v in sorted(event_types_seen.items()))
597
+ poller_logger.info(
598
+ "Processed %d events (types: %s), updated last_seq to %d",
599
+ events_received,
600
+ event_types_str,
601
+ last_seq,
602
+ )
603
+ # Log if we're not seeing progress events
604
+ if "prompt.learning.progress" not in event_types_seen:
605
+ poller_logger.debug(
606
+ "No progress events in this batch (last_seq=%d). Event types seen: %s",
607
+ last_seq,
608
+ event_types_str,
609
+ )
610
+
611
+ # Reset timeout counter on successful request
612
+ consecutive_timeouts = 0
613
+
614
+ except AssertionError as e:
615
+ poller_logger.error(
616
+ "❌ Assertion failed while parsing events for job %s: %s. Response: %s",
617
+ backend_job_id,
618
+ e,
619
+ resp.text[:500] if resp else "No response",
620
+ )
621
+ # Continue polling - don't stop on validation errors
622
+ except ValueError as e:
623
+ poller_logger.error(
624
+ "❌ Invalid API response format for job %s: %s. Response: %s",
625
+ backend_job_id,
626
+ e,
627
+ resp.text[:500] if resp else "No response",
628
+ )
629
+ # Continue polling - don't stop on validation errors
630
+ except Exception as e:
631
+ poller_logger.error(
632
+ "❌ Unexpected error parsing events for job %s: %s. Response: %s",
633
+ backend_job_id,
634
+ e,
635
+ resp.text[:500] if resp else "No response",
636
+ exc_info=True,
637
+ )
638
+ # Continue polling - don't stop on parsing errors
639
+ elif resp.status_code == 404:
640
+ # Job not found yet or doesn't exist - stop polling
641
+ poller_logger.warning("Backend job %s not found (404), stopping poller", backend_job_id)
642
+ break
643
+ elif resp.status_code != 200:
644
+ poller_logger.warning(
645
+ "Backend API returned status %d for job %s: %s",
646
+ resp.status_code,
647
+ backend_job_id,
648
+ resp.text[:200],
649
+ )
650
+ except requests.exceptions.ReadTimeout as e:
651
+ # ReadTimeout is expected when backend is slow - log as warning and use exponential backoff
652
+ consecutive_timeouts += 1
653
+ backoff_seconds = min(base_poll_interval * (2 ** min(consecutive_timeouts - 1, 4)), 60.0) # Max 60s backoff
654
+ poller_logger.warning(
655
+ "Backend timeout polling job %s (consecutive=%d, backing off %.1fs): %s",
656
+ backend_job_id,
657
+ consecutive_timeouts,
658
+ backoff_seconds,
659
+ e,
660
+ )
661
+ # Use exponential backoff on timeout
662
+ stop_event.wait(timeout=backoff_seconds)
663
+ continue
664
+ except requests.exceptions.RequestException as e:
665
+ # Other network errors - log as warning, reset timeout counter
666
+ consecutive_timeouts = 0
667
+ poller_logger.warning("Network error polling job %s: %s", backend_job_id, e)
668
+ except Exception as e:
669
+ # Unexpected errors - log as error but don't crash
670
+ consecutive_timeouts = 0
671
+ poller_logger.error("Progress poller error for job %s: %s", backend_job_id, e, exc_info=True)
672
+
673
+ # Poll every 5 seconds (or after backoff)
674
+ stop_event.wait(timeout=base_poll_interval)
675
+
676
+ poller_logger.info("📡 Stopped progress poller for backend job %s", backend_job_id)
677
+
678
+
679
+ def _truncate(text: str, limit: int = 4000) -> str:
680
+ """Truncate text to a maximum length, keeping the end portion.
681
+
682
+ Args:
683
+ text: Text to truncate
684
+ limit: Maximum length in characters (default: 4000)
685
+
686
+ Returns:
687
+ Truncated text (last `limit` characters if text exceeds limit)
688
+ """
689
+ if len(text) <= limit:
690
+ return text
691
+ return text[-limit:]
692
+
693
+
694
+ def _build_train_command(config_path: str) -> list[str]:
695
+ """Build the training command for running a prompt learning job.
696
+
697
+ Constructs a command list suitable for subprocess execution by:
698
+ 1. Getting the base command from EXPERIMENT_QUEUE_TRAIN_CMD env var or default
699
+ 2. Parsing the base command into segments
700
+ 3. Appending prompt learning specific flags (--type, --config, --poll, etc.)
701
+ 4. Adding --backend flag with URL from experiment queue config
702
+
703
+ Args:
704
+ config_path: Path to the TOML config file for the experiment
705
+
706
+ Returns:
707
+ List of command segments ready for subprocess execution
708
+
709
+ Note:
710
+ The base command defaults to `python -m synth_ai.cli train` if
711
+ EXPERIMENT_QUEUE_TRAIN_CMD is not set. The command always includes
712
+ --type prompt_learning, --config, --poll, --stream-format cli, and --backend flags.
713
+ """
714
+ # Get command from env var or use default (lazily evaluated)
715
+ base_cmd = os.getenv(TRAIN_COMMAND_ENV)
716
+ if base_cmd:
717
+ logger.debug("Using training command from EXPERIMENT_QUEUE_TRAIN_CMD: %s", base_cmd)
718
+ else:
719
+ base_cmd = _get_default_train_cmd()
720
+ logger.debug("Using default training command: %s", base_cmd)
721
+
722
+ segments: list[str] = []
723
+ for part in shlex.split(base_cmd):
724
+ if part:
725
+ segments.append(part)
726
+
727
+ # Get backend URL from config and add --backend flag
728
+ config = load_config()
729
+ backend_url = config.backend_url
730
+
731
+ segments.extend(
732
+ [
733
+ "--type",
734
+ "prompt_learning",
735
+ "--config",
736
+ config_path,
737
+ "--backend",
738
+ backend_url,
739
+ "--poll",
740
+ "--stream-format",
741
+ "cli",
742
+ ]
743
+ )
744
+ return segments
745
+
746
+
747
+ def _mark_job_running(job_id: str, task_id: str | None) -> ExperimentJob | None:
748
+ """Mark a job as running and update its status in the database.
749
+
750
+ Updates the job status to RUNNING, sets the started_at timestamp, and
751
+ optionally associates a Celery task ID. If the parent experiment is
752
+ QUEUED, it is also marked as RUNNING.
753
+
754
+ Args:
755
+ job_id: Job identifier
756
+ task_id: Optional Celery task ID to associate with the job
757
+
758
+ Returns:
759
+ ExperimentJob instance if found, None otherwise
760
+
761
+ Note:
762
+ The job is expunged from the session so it can be safely used outside
763
+ the session scope. The session is committed automatically by session_scope.
764
+ """
765
+ with session_scope() as session:
766
+ job = session.get(ExperimentJob, job_id)
767
+ if not job:
768
+ logger.warning("Job %s missing from database", job_id)
769
+ return None
770
+ job.status = ExperimentJobStatus.RUNNING
771
+ job.started_at = datetime.now(UTC)
772
+ if task_id:
773
+ job.celery_task_id = task_id
774
+ experiment = job.experiment
775
+ if experiment and experiment.status == ExperimentStatus.QUEUED:
776
+ experiment.status = ExperimentStatus.RUNNING
777
+ experiment.started_at = datetime.now(UTC)
778
+ session.flush()
779
+ # Expunge so job can be safely used outside session scope
780
+ session.expunge(job)
781
+ return job
782
+
783
+
784
+ def _jobs_remaining(session, experiment_id: str) -> int:
785
+ """Count remaining jobs (QUEUED or RUNNING) for an experiment.
786
+
787
+ Args:
788
+ session: SQLAlchemy session
789
+ experiment_id: Experiment identifier
790
+
791
+ Returns:
792
+ Number of jobs that are still QUEUED or RUNNING (not completed/failed)
793
+ """
794
+ return (
795
+ session.query(ExperimentJob)
796
+ .filter(
797
+ ExperimentJob.experiment_id == experiment_id,
798
+ ExperimentJob.status.in_(
799
+ [
800
+ ExperimentJobStatus.QUEUED,
801
+ ExperimentJobStatus.RUNNING,
802
+ ]
803
+ ),
804
+ )
805
+ .count()
806
+ )
807
+
808
+
809
+ def _finalize_job(
810
+ job_id: str,
811
+ *,
812
+ summary: ResultSummary,
813
+ success: bool,
814
+ error_message: str | None = None,
815
+ command: str | None = None,
816
+ working_directory: str | None = None,
817
+ python_executable: str | None = None,
818
+ environment_keys: list[str] | None = None,
819
+ ) -> dict[str, Any] | None:
820
+ """Finalize a job by updating its status and persisting results.
821
+
822
+ Updates the job status to COMPLETED or FAILED based on success flag,
823
+ persists trial data if successful, and updates experiment status when
824
+ all jobs are done. If the experiment has remaining jobs, dispatches them.
825
+
826
+ Args:
827
+ job_id: Job identifier
828
+ summary: Result summary containing stdout, stderr, metrics, etc.
829
+ success: Whether the job completed successfully
830
+ error_message: Optional error message if job failed
831
+
832
+ Returns:
833
+ Summary dictionary if job found, None otherwise
834
+
835
+ Note:
836
+ - If successful: Job status set to COMPLETED, trials persisted
837
+ - If failed: Job status set to FAILED, error message stored
838
+ - Experiment status updated to COMPLETED/FAILED only when all jobs done
839
+ - Remaining jobs are dispatched if experiment still has queued jobs
840
+ """
841
+ with session_scope() as session:
842
+ job = session.get(ExperimentJob, job_id)
843
+ if not job:
844
+ logger.warning("Job %s missing during finalize", job_id)
845
+ return None
846
+
847
+ job.completed_at = datetime.now(UTC)
848
+ experiment = job.experiment
849
+
850
+ # ALWAYS create execution log entry (for both success and failure)
851
+ # This allows querying failures directly from the database
852
+ if command is not None and working_directory is not None:
853
+ from uuid import uuid4
854
+ # For failed jobs, store full stdout/stderr (up to 100k chars each)
855
+ # For successful jobs, truncate to 4k chars to save space
856
+ stdout_for_log = summary.stdout or ""
857
+ stderr_for_log = summary.stderr or ""
858
+ if not success:
859
+ # Keep full output for errors (truncate only if extremely large)
860
+ if len(stdout_for_log) > 100000:
861
+ stdout_for_log = f"{stdout_for_log[:50000]}\n\n... (truncated {len(stdout_for_log) - 100000} chars) ...\n\n{stdout_for_log[-50000:]}"
862
+ if len(stderr_for_log) > 100000:
863
+ stderr_for_log = f"{stderr_for_log[:50000]}\n\n... (truncated {len(stderr_for_log) - 100000} chars) ...\n\n{stderr_for_log[-50000:]}"
864
+ else:
865
+ # Truncate successful job output to save space
866
+ stdout_for_log = _truncate(stdout_for_log)
867
+ stderr_for_log = _truncate(stderr_for_log)
868
+
869
+ execution_log = JobExecutionLog(
870
+ log_id=f"log_{uuid4().hex[:12]}",
871
+ job_id=job_id,
872
+ command=command,
873
+ working_directory=working_directory,
874
+ returncode=summary.returncode,
875
+ stdout=stdout_for_log,
876
+ stderr=stderr_for_log,
877
+ python_executable=python_executable,
878
+ environment_keys=environment_keys,
879
+ )
880
+ session.add(execution_log)
881
+ logger.info(
882
+ "Created execution log for job %s: returncode=%d, stdout_len=%d (stored: %d), stderr_len=%d (stored: %d)%s",
883
+ job_id,
884
+ summary.returncode,
885
+ len(summary.stdout or ""),
886
+ len(stdout_for_log),
887
+ len(summary.stderr or ""),
888
+ len(stderr_for_log),
889
+ " [FULL ERROR STORED]" if not success else "",
890
+ )
891
+
892
+ if success:
893
+ # Only set job.result for successful jobs to prevent stale data from previous runs
894
+ job.result = summary.to_dict()
895
+ job.status = ExperimentJobStatus.COMPLETED
896
+ persist_trials_from_summary(session, job, summary)
897
+ if experiment:
898
+ update_experiment_metadata(experiment, summary)
899
+
900
+ # ✅ ADD: Update status_json with final stats from backend job metadata
901
+ if job.backend_job_id:
902
+ try:
903
+ import requests
904
+
905
+ from .service import update_job_status
906
+
907
+ # Fetch backend job metadata
908
+ config = load_config()
909
+ backend_url = config.backend_url
910
+ # Load API key from .env - fail loudly if not found
911
+ try:
912
+ api_key = _load_synth_api_key()
913
+ except RuntimeError as e:
914
+ logger.error(str(e))
915
+ raise
916
+
917
+ if backend_url and api_key:
918
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{job.backend_job_id}"
919
+ headers = {"Authorization": f"Bearer {api_key}"}
920
+ resp = requests.get(url, headers=headers, timeout=60.0) # Increased from 10s to 60s to handle backend overload
921
+
922
+ if resp.status_code == 200:
923
+ backend_job = resp.json()
924
+ backend_metadata = backend_job.get("metadata", {})
925
+ backend_stats = backend_metadata.get("stats", {})
926
+
927
+ if backend_stats:
928
+ # Update status_json with final stats (including scores for result extraction)
929
+ status_update = {
930
+ "trials_tried": backend_stats.get("trials_tried"),
931
+ "total_tokens": backend_stats.get("total_tokens"),
932
+ "total_rollouts": backend_stats.get("total_rollouts"),
933
+ "optimization_rollouts_executed": backend_stats.get("optimization_rollouts_executed"),
934
+ "validation_rollouts_executed": backend_stats.get("validation_rollouts_executed"),
935
+ "optimization_trials_evaluated": backend_stats.get("optimization_trials_evaluated"),
936
+ "validation_trials_evaluated": backend_stats.get("validation_trials_evaluated"),
937
+ # CRITICAL: Store scores for result extraction (if backend job returns 404 later)
938
+ "baseline_score": backend_stats.get("baseline_score"),
939
+ "best_score": backend_stats.get("best_score") or backend_stats.get("best_validation_score"),
940
+ "total_time_seconds": backend_stats.get("total_time_seconds"),
941
+ "eval_seeds_n": backend_stats.get("eval_seeds_n"),
942
+ "transformations_evaluated": backend_stats.get("transformations_evaluated"),
943
+ }
944
+ # Remove None values
945
+ status_update = {k: v for k, v in status_update.items() if v is not None}
946
+ # ✅ ADD: Assertion to ensure we have at least some stats
947
+ assert len(status_update) > 0, f"status_update must not be empty for job {job_id}"
948
+ if status_update:
949
+ update_job_status(job_id, status_update)
950
+ logger.info(
951
+ "Updated status_json with final stats for job %s: %s",
952
+ job_id,
953
+ status_update,
954
+ )
955
+ except Exception as e:
956
+ # Log but don't fail job finalization if stats update fails
957
+ logger.warning(
958
+ "Failed to update status_json with final stats for job %s: %s",
959
+ job_id,
960
+ e,
961
+ )
962
+ else:
963
+ # Job failed - clear job.result to prevent stale data from previous successful runs
964
+ job.result = None
965
+ job.status = ExperimentJobStatus.FAILED
966
+ # Store full error message (truncate to 100k chars max to avoid DB issues, but keep full context)
967
+ full_error = error_message or summary.stderr or "Job failed"
968
+ if len(full_error) > 100000:
969
+ # Keep first 50k and last 50k chars
970
+ full_error = f"{full_error[:50000]}\n\n... (truncated {len(full_error) - 100000} chars) ...\n\n{full_error[-50000:]}"
971
+ job.error = full_error
972
+ if experiment:
973
+ # Don't immediately mark experiment as failed - let remaining jobs continue
974
+ # The experiment will be marked as failed only if all jobs fail
975
+ logger.warning(
976
+ "Job %s failed for experiment %s, but allowing remaining jobs to continue",
977
+ job_id,
978
+ experiment.experiment_id,
979
+ )
980
+
981
+ session.flush()
982
+
983
+ if experiment:
984
+ remaining = _jobs_remaining(session, experiment.experiment_id)
985
+ if remaining == 0:
986
+ # All jobs completed - check if experiment succeeded or failed
987
+ all_jobs = (
988
+ session.query(ExperimentJob)
989
+ .filter(ExperimentJob.experiment_id == experiment.experiment_id)
990
+ .all()
991
+ )
992
+ all_failed = all(
993
+ job.status == ExperimentJobStatus.FAILED for job in all_jobs
994
+ )
995
+ if all_failed:
996
+ experiment.status = ExperimentStatus.FAILED
997
+ experiment.error = (
998
+ all_jobs[0].error if all_jobs else "All jobs failed"
999
+ )
1000
+ else:
1001
+ experiment.status = ExperimentStatus.COMPLETED
1002
+ experiment.completed_at = datetime.now(UTC)
1003
+ else:
1004
+ # Dispatch remaining jobs (periodic task will also handle this as backup)
1005
+ dispatch_available_jobs(session, experiment.experiment_id)
1006
+
1007
+ return summary.to_dict()
1008
+
1009
+
1010
+ @celery_app.task(bind=True, name="synth_ai.cli.local.experiment_queue.run_experiment_job")
1011
+ def run_experiment_job(self, job_id: str) -> dict[str, Any] | None:
1012
+ """Celery task entrypoint for running a prompt learning experiment job.
1013
+
1014
+ This is the main Celery task that executes prompt learning jobs. It:
1015
+ 1. Marks the job as RUNNING
1016
+ 2. Prepares the config file (applies overrides)
1017
+ 3. Builds and executes the training command via subprocess
1018
+ 4. Collects results (stdout, stderr, metrics, artifacts)
1019
+ 5. Finalizes the job (updates status, persists results)
1020
+
1021
+ Args:
1022
+ self: Celery task instance (bound task)
1023
+ job_id: Job identifier from the experiment queue database
1024
+
1025
+ Returns:
1026
+ Result summary dictionary if successful, None if job not found
1027
+
1028
+ Raises:
1029
+ AssertionError: If inputs are invalid (should not happen in production)
1030
+
1031
+ Note:
1032
+ The task runs the training command (`synth-ai train --type prompt_learning`)
1033
+ as a subprocess and captures stdout/stderr. Health check failures and
1034
+ authentication errors are detected and cause job failure even if returncode is 0.
1035
+ """
1036
+ # Validate input
1037
+ assert isinstance(job_id, str), (
1038
+ f"job_id must be str, got {type(job_id).__name__}: {job_id}"
1039
+ )
1040
+ assert job_id, "job_id cannot be empty"
1041
+
1042
+ job = _mark_job_running(job_id, getattr(self.request, "id", None))
1043
+ if not job:
1044
+ logger.warning("Job %s not found or could not be marked as running", job_id)
1045
+ return None
1046
+
1047
+ # Validate job object
1048
+ assert isinstance(job, ExperimentJob), (
1049
+ f"_mark_job_running must return ExperimentJob, got {type(job).__name__}"
1050
+ )
1051
+ assert job.job_id == job_id, (
1052
+ f"Job ID mismatch: expected {job_id}, got {job.job_id}"
1053
+ )
1054
+ assert job.status == ExperimentJobStatus.RUNNING, (
1055
+ f"Job status must be RUNNING, got {job.status}"
1056
+ )
1057
+
1058
+ summary = ResultSummary()
1059
+ prepared: PreparedConfig | None = None
1060
+ success = False
1061
+ error_message: str | None = None # Will be set if training fails
1062
+ cmd: list[str] | None = None # Store command for execution logging
1063
+ env: dict[str, str] | None = None # Store environment for execution logging
1064
+
1065
+ # Initialize status tracker
1066
+ assert job.job_id, "job.job_id cannot be empty"
1067
+ status_tracker = ExperimentStatusTracker(job.job_id)
1068
+ assert status_tracker.job_id == job.job_id, (
1069
+ f"Status tracker job_id mismatch: expected {job.job_id}, got {status_tracker.job_id}"
1070
+ )
1071
+
1072
+ job_start_time = time.time()
1073
+ assert job_start_time > 0, f"job_start_time must be > 0, got {job_start_time}"
1074
+
1075
+ policy: str | None = None
1076
+ environment: str | None = None
1077
+
1078
+ try:
1079
+ # Validate config_path
1080
+ assert job.config_path, "job.config_path cannot be empty"
1081
+ assert isinstance(job.config_path, str), (
1082
+ f"job.config_path must be str, got {type(job.config_path).__name__}"
1083
+ )
1084
+
1085
+ # Validate config_overrides
1086
+ if job.config_overrides is not None:
1087
+ assert isinstance(job.config_overrides, dict), (
1088
+ f"job.config_overrides must be dict, got {type(job.config_overrides).__name__}"
1089
+ )
1090
+
1091
+ prepared = prepare_config_file(job.config_path, job.config_overrides or {})
1092
+ assert prepared is not None, "prepare_config_file returned None"
1093
+ assert isinstance(prepared, PreparedConfig), (
1094
+ f"prepare_config_file must return PreparedConfig, got {type(prepared).__name__}"
1095
+ )
1096
+ assert prepared.path.exists(), (
1097
+ f"Prepared config file must exist: {prepared.path}"
1098
+ )
1099
+
1100
+ # Extract policy and environment from config
1101
+ policy, environment = extract_config_info(prepared.path)
1102
+ assert isinstance(policy, str | type(None)), (
1103
+ f"policy must be str | None, got {type(policy).__name__}: {policy}"
1104
+ )
1105
+ assert isinstance(environment, str | type(None)), (
1106
+ f"environment must be str | None, got {type(environment).__name__}: {environment}"
1107
+ )
1108
+
1109
+ # Extract model/provider from override FIRST (override takes precedence)
1110
+ model_override = None
1111
+ provider_override = None
1112
+ if job.config_overrides:
1113
+ model_override = job.config_overrides.get("prompt_learning.policy.model")
1114
+ provider_override = job.config_overrides.get("prompt_learning.policy.provider")
1115
+
1116
+ # Use override if available, otherwise use extracted
1117
+ final_model = model_override or policy
1118
+ final_provider = provider_override
1119
+
1120
+ # ASSERT: Verify overrides were applied by checking the prepared config
1121
+ if job.config_overrides:
1122
+ rollout_budget_override = job.config_overrides.get("prompt_learning.gepa.rollout.budget")
1123
+ max_rollouts_override = job.config_overrides.get("prompt_learning.termination_config.max_rollouts")
1124
+
1125
+ # Assert model override matches extracted policy
1126
+ if model_override:
1127
+ assert policy == model_override, (
1128
+ f"CRITICAL: Policy model mismatch for job {job.job_id}: "
1129
+ f"override={model_override!r} but extracted={policy!r}. "
1130
+ f"This indicates the override wasn't applied correctly to the prepared config. "
1131
+ f"Config path: {prepared.path}"
1132
+ )
1133
+ logger.info(
1134
+ "✅ Config override verified for job %s: model=%s matches extracted policy",
1135
+ job.job_id,
1136
+ model_override,
1137
+ )
1138
+
1139
+ # Assert provider override if specified
1140
+ if provider_override:
1141
+ # Extract provider from prepared config
1142
+ import tomllib
1143
+ with open(prepared.path, "rb") as f:
1144
+ prepared_config = tomllib.load(f)
1145
+ pl_section = prepared_config.get("prompt_learning", {})
1146
+ policy_section = pl_section.get("policy", {})
1147
+ extracted_provider = policy_section.get("provider") if isinstance(policy_section, dict) else None
1148
+ if extracted_provider:
1149
+ assert extracted_provider == provider_override, (
1150
+ f"CRITICAL: Provider mismatch for job {job.job_id}: "
1151
+ f"override={provider_override!r} but extracted={extracted_provider!r}. "
1152
+ f"Config path: {prepared.path}"
1153
+ )
1154
+
1155
+ # Assert rollout budget override if specified
1156
+ if rollout_budget_override is not None:
1157
+ import tomllib
1158
+ with open(prepared.path, "rb") as f:
1159
+ prepared_config = tomllib.load(f)
1160
+ pl_section = prepared_config.get("prompt_learning", {})
1161
+ gepa_section = pl_section.get("gepa", {})
1162
+ rollout_section = gepa_section.get("rollout", {}) if isinstance(gepa_section, dict) else {}
1163
+ extracted_budget = rollout_section.get("budget") if isinstance(rollout_section, dict) else None
1164
+ if extracted_budget is not None:
1165
+ assert extracted_budget == rollout_budget_override, (
1166
+ f"CRITICAL: Rollout budget mismatch for job {job.job_id}: "
1167
+ f"override={rollout_budget_override} but extracted={extracted_budget}. "
1168
+ f"Config path: {prepared.path}"
1169
+ )
1170
+
1171
+ # Assert max_rollouts override if specified
1172
+ if max_rollouts_override is not None:
1173
+ import tomllib
1174
+ with open(prepared.path, "rb") as f:
1175
+ prepared_config = tomllib.load(f)
1176
+ pl_section = prepared_config.get("prompt_learning", {})
1177
+ termination_section = pl_section.get("termination_config", {})
1178
+ extracted_max_rollouts = termination_section.get("max_rollouts") if isinstance(termination_section, dict) else None
1179
+ if extracted_max_rollouts is not None:
1180
+ assert extracted_max_rollouts == max_rollouts_override, (
1181
+ f"CRITICAL: Max rollouts mismatch for job {job.job_id}: "
1182
+ f"override={max_rollouts_override} but extracted={extracted_max_rollouts}. "
1183
+ f"Config path: {prepared.path}"
1184
+ )
1185
+
1186
+ if final_model or environment:
1187
+ # Build policy string with provider if available
1188
+ policy_str = f"{final_provider}/{final_model}" if final_provider and final_model else final_model
1189
+ status_tracker.update(policy=policy_str, environment=environment)
1190
+ logger.info(
1191
+ "📊 Experiment config for job %s: policy=%s, environment=%s",
1192
+ job.job_id,
1193
+ policy or "unknown",
1194
+ environment or "unknown",
1195
+ )
1196
+
1197
+ cmd = _build_train_command(str(prepared.path))
1198
+ assert isinstance(cmd, list), (
1199
+ f"_build_train_command must return list, got {type(cmd).__name__}"
1200
+ )
1201
+ # Store cmd for execution logging (needed at end of function)
1202
+ assert len(cmd) > 0, "Command list cannot be empty"
1203
+ assert all(isinstance(arg, str) for arg in cmd), (
1204
+ f"All command arguments must be str, got types: {[type(arg).__name__ for arg in cmd]}"
1205
+ )
1206
+ logger.info("Executing job %s via command: %s", job.job_id, " ".join(cmd))
1207
+
1208
+ # Run command with unbuffered output to see errors immediately
1209
+ env = os.environ.copy()
1210
+ assert isinstance(env, dict), (
1211
+ f"os.environ.copy() must return dict, got {type(env).__name__}"
1212
+ )
1213
+ env["PYTHONUNBUFFERED"] = "1"
1214
+
1215
+ # Log authentication status BEFORE running command
1216
+ synth_key = env.get("SYNTH_API_KEY")
1217
+ env_key = env.get("ENVIRONMENT_API_KEY")
1218
+ logger.info(
1219
+ "🔐 Authentication status for job %s:\n"
1220
+ " SYNTH_API_KEY: %s\n"
1221
+ " ENVIRONMENT_API_KEY: %s",
1222
+ job.job_id,
1223
+ f"{synth_key[:8]}...{synth_key[-4:]}" if synth_key and len(synth_key) > 12 else "(NOT SET)",
1224
+ f"{env_key[:8]}...{env_key[-4:]}" if env_key and len(env_key) > 12 else "(NOT SET)",
1225
+ )
1226
+
1227
+ logger.info(
1228
+ "🚀 Starting subprocess for job %s:\n"
1229
+ " Command: %s\n"
1230
+ " Working directory: %s\n"
1231
+ " Python executable: %s\n"
1232
+ " Environment keys: %s",
1233
+ job.job_id,
1234
+ " ".join(cmd),
1235
+ os.getcwd(),
1236
+ env.get("PYTHON", "python"),
1237
+ ", ".join(sorted([k for k in env if "API" in k or "KEY" in k])),
1238
+ )
1239
+
1240
+ # Get backend URL and API key for progress polling
1241
+ config = load_config()
1242
+ assert config is not None, "load_config() returned None"
1243
+ backend_url = config.backend_url
1244
+ assert isinstance(backend_url, str), (
1245
+ f"config.backend_url must be str, got {type(backend_url).__name__}"
1246
+ )
1247
+ assert backend_url.startswith(("http://", "https://")), (
1248
+ f"backend_url must start with http:// or https://, got {backend_url}"
1249
+ )
1250
+
1251
+ # Get API key from .env file - fail loudly if not found
1252
+ # This is needed for the poller thread, which runs in the worker process
1253
+ try:
1254
+ api_key = _load_synth_api_key()
1255
+ except RuntimeError as e:
1256
+ logger.error(str(e))
1257
+ raise
1258
+
1259
+ # Start background progress poller (will be started once we have backend_job_id)
1260
+ poller_stop = threading.Event()
1261
+ assert poller_stop is not None, "threading.Event() returned None"
1262
+ poller_thread: threading.Thread | None = None
1263
+ backend_job_id: str | None = None
1264
+
1265
+ try:
1266
+ # Stream subprocess output line-by-line to extract backend_job_id and parse progress
1267
+ process = subprocess.Popen(
1268
+ cmd,
1269
+ stdout=subprocess.PIPE,
1270
+ stderr=subprocess.STDOUT,
1271
+ text=True,
1272
+ env=env,
1273
+ bufsize=1, # Line buffered
1274
+ )
1275
+ assert process is not None, "subprocess.Popen() returned None"
1276
+ assert process.stdout is not None, "process.stdout is None"
1277
+
1278
+ stdout_lines: list[str] = []
1279
+ accumulated_output = "" # Accumulate output for better pattern matching
1280
+ last_status_update_time = job_start_time
1281
+ status_update_interval = 5.0 # Update status_json every 5 seconds even without progress
1282
+ assert status_update_interval > 0, (
1283
+ f"status_update_interval must be > 0, got {status_update_interval}"
1284
+ )
1285
+
1286
+ # Read output line-by-line with timeout protection
1287
+ # If subprocess crashes immediately, we need to ensure we capture the error
1288
+ try:
1289
+ # Read output line-by-line
1290
+ for line in process.stdout:
1291
+ assert isinstance(line, str), (
1292
+ f"process.stdout line must be str, got {type(line).__name__}"
1293
+ )
1294
+ stdout_lines.append(line)
1295
+ assert isinstance(accumulated_output, str), (
1296
+ f"accumulated_output must be str, got {type(accumulated_output).__name__}"
1297
+ )
1298
+ accumulated_output += line
1299
+ assert len(accumulated_output) >= len(line), (
1300
+ f"accumulated_output length should increase, got {len(accumulated_output)} < {len(line)}"
1301
+ )
1302
+
1303
+ # Try to extract backend_job_id from output
1304
+ if not backend_job_id:
1305
+ extracted_id = _extract_backend_job_id(line)
1306
+ if extracted_id:
1307
+ # Assert extracted ID is valid before using it
1308
+ assert extracted_id.startswith("pl_"), (
1309
+ f"Invalid backend_job_id format: {extracted_id}"
1310
+ )
1311
+ assert len(extracted_id) > 3, (
1312
+ f"Backend job ID too short: {extracted_id}"
1313
+ )
1314
+
1315
+ backend_job_id = extracted_id
1316
+ logger.info("📋 Extracted backend job ID: %s", backend_job_id)
1317
+
1318
+ # ✅ ADD: Store backend_job_id in status_json for debugging
1319
+ status_tracker.update(custom_fields={"backend_job_id": backend_job_id})
1320
+ logger.info("📋 Stored backend_job_id in status_json for job %s", job.job_id)
1321
+
1322
+ # Update job with backend_job_id
1323
+ with session_scope() as session:
1324
+ db_job = session.get(ExperimentJob, job.job_id)
1325
+ if db_job:
1326
+ db_job.backend_job_id = backend_job_id
1327
+ session.commit()
1328
+
1329
+ # Start progress poller now that we have backend_job_id
1330
+ # API key should already be loaded and validated above
1331
+ if not api_key:
1332
+ raise RuntimeError(
1333
+ f"❌ SYNTH_API_KEY not available for job {job.job_id}. "
1334
+ "This should have been caught earlier - API key loading failed."
1335
+ )
1336
+ elif not backend_url:
1337
+ logger.warning(
1338
+ "⚠️ Cannot start progress poller for job %s: backend_url not configured. "
1339
+ "Progress updates will not be available, but job will continue.",
1340
+ job.job_id,
1341
+ )
1342
+ elif backend_job_id and not backend_job_id.startswith("pl_"):
1343
+ logger.warning(
1344
+ "⚠️ Cannot start progress poller for job %s: invalid backend_job_id format: %s. "
1345
+ "Progress updates will not be available, but job will continue.",
1346
+ job.job_id,
1347
+ backend_job_id,
1348
+ )
1349
+
1350
+ if api_key and backend_url and backend_job_id and backend_job_id.startswith("pl_"):
1351
+ # Validate all inputs before starting thread
1352
+ assert isinstance(backend_job_id, str), (
1353
+ f"backend_job_id must be str, got {type(backend_job_id).__name__}"
1354
+ )
1355
+ assert isinstance(status_tracker, ExperimentStatusTracker), (
1356
+ f"status_tracker must be ExperimentStatusTracker, got {type(status_tracker).__name__}"
1357
+ )
1358
+ assert isinstance(backend_url, str), (
1359
+ f"backend_url must be str, got {type(backend_url).__name__}"
1360
+ )
1361
+ assert isinstance(api_key, str), (
1362
+ f"api_key must be str, got {type(api_key).__name__}"
1363
+ )
1364
+ assert poller_stop is not None, "poller_stop cannot be None"
1365
+
1366
+ poller_thread = threading.Thread(
1367
+ target=_poll_backend_progress,
1368
+ args=(
1369
+ backend_job_id,
1370
+ status_tracker,
1371
+ policy,
1372
+ environment,
1373
+ backend_url,
1374
+ api_key,
1375
+ poller_stop,
1376
+ job_start_time, # Pass job start time for rollouts/min calculation
1377
+ ),
1378
+ daemon=True,
1379
+ )
1380
+ assert poller_thread is not None, "threading.Thread() returned None"
1381
+ poller_thread.start()
1382
+ assert poller_thread.is_alive() or not poller_thread.is_alive(), (
1383
+ "Thread should be startable"
1384
+ )
1385
+ logger.info("📡 Started progress poller for backend job %s", backend_job_id)
1386
+ else:
1387
+ logger.warning(
1388
+ "Cannot start progress poller: missing API key or backend URL"
1389
+ )
1390
+
1391
+ # Parse accumulated output for progress updates (fallback if API polling fails)
1392
+ # Use accumulated output (not just current line) for better pattern matching
1393
+ # Update status_json periodically even without progress data to show elapsed time
1394
+ current_time = time.time()
1395
+ assert current_time >= job_start_time, (
1396
+ f"current_time ({current_time}) < job_start_time ({job_start_time})"
1397
+ )
1398
+ assert isinstance(accumulated_output, str), (
1399
+ f"accumulated_output must be str, got {type(accumulated_output).__name__}"
1400
+ )
1401
+
1402
+ should_update = (
1403
+ # Update if we find progress patterns
1404
+ "rollouts=" in line.lower() or
1405
+ "progress:" in line.lower() or
1406
+ "gepa progress:" in line.lower() or
1407
+ # Or update periodically (every 5 seconds) to show elapsed time
1408
+ (current_time - last_status_update_time) >= status_update_interval
1409
+ )
1410
+ assert isinstance(should_update, bool), (
1411
+ f"should_update must be bool, got {type(should_update).__name__}"
1412
+ )
1413
+
1414
+ if should_update:
1415
+ # Validate accumulated_output before parsing
1416
+ assert len(accumulated_output) > 0, "accumulated_output cannot be empty"
1417
+ output_to_parse = accumulated_output[-5000:] # Last 5KB to avoid parsing huge outputs
1418
+ assert isinstance(output_to_parse, str), (
1419
+ f"output_to_parse must be str, got {type(output_to_parse).__name__}"
1420
+ )
1421
+ assert len(output_to_parse) <= len(accumulated_output), (
1422
+ f"output_to_parse length ({len(output_to_parse)}) > accumulated_output length ({len(accumulated_output)})"
1423
+ )
1424
+
1425
+ update_status_from_output(
1426
+ status_tracker,
1427
+ output_to_parse,
1428
+ policy=policy,
1429
+ environment=environment,
1430
+ start_time=job_start_time,
1431
+ )
1432
+ last_status_update_time = current_time
1433
+ assert last_status_update_time >= job_start_time, (
1434
+ f"last_status_update_time ({last_status_update_time}) < job_start_time ({job_start_time})"
1435
+ )
1436
+ except (BrokenPipeError, OSError) as e:
1437
+ # Subprocess may have crashed - log and continue to wait() to get returncode
1438
+ logger.warning(
1439
+ "Error reading subprocess stdout for job %s (process may have crashed): %s",
1440
+ job.job_id,
1441
+ e,
1442
+ )
1443
+ # Continue to process.wait() to get the returncode and any buffered output
1444
+
1445
+ # Wait for process to complete (ALWAYS wait, even if stdout reading failed)
1446
+ assert process is not None, "process is None before wait()"
1447
+ returncode = process.wait()
1448
+
1449
+ # If stdout reading failed but process exited, try to read any remaining buffered output
1450
+ if process.stdout and not stdout_lines:
1451
+ try:
1452
+ remaining_output = process.stdout.read()
1453
+ if remaining_output:
1454
+ stdout_lines.append(remaining_output)
1455
+ accumulated_output += remaining_output
1456
+ logger.info(
1457
+ "Captured remaining subprocess output for job %s after process exit: %d bytes",
1458
+ job.job_id,
1459
+ len(remaining_output),
1460
+ )
1461
+ except Exception as e:
1462
+ logger.warning(
1463
+ "Failed to read remaining subprocess output for job %s: %s",
1464
+ job.job_id,
1465
+ e,
1466
+ )
1467
+ assert isinstance(returncode, int), (
1468
+ f"process.wait() must return int, got {type(returncode).__name__}: {returncode}"
1469
+ )
1470
+
1471
+ # Combine output
1472
+ assert isinstance(stdout_lines, list), (
1473
+ f"stdout_lines must be list, got {type(stdout_lines).__name__}"
1474
+ )
1475
+ assert all(isinstance(line, str) for line in stdout_lines), (
1476
+ f"All stdout_lines must be str, got types: {[type(line).__name__ for line in stdout_lines[:5]]}"
1477
+ )
1478
+
1479
+ stdout = "".join(stdout_lines)
1480
+ assert isinstance(stdout, str), (
1481
+ f"stdout must be str, got {type(stdout).__name__}"
1482
+ )
1483
+ stderr = "" # stderr is redirected to stdout
1484
+ assert isinstance(stderr, str), (
1485
+ f"stderr must be str, got {type(stderr).__name__}"
1486
+ )
1487
+
1488
+ # CRITICAL: If subprocess failed but we have no output, log a warning
1489
+ # This indicates the subprocess crashed before producing any output
1490
+ if returncode != 0 and not stdout:
1491
+ logger.error(
1492
+ "❌ Subprocess for job %s exited with code %d but produced NO output. "
1493
+ "This usually indicates an immediate crash (import error, syntax error, etc.). "
1494
+ "Command: %s",
1495
+ job.job_id,
1496
+ returncode,
1497
+ " ".join(cmd),
1498
+ )
1499
+ # Set a helpful error message
1500
+ stdout = (
1501
+ f"[ERROR] Subprocess crashed immediately with exit code {returncode}. "
1502
+ f"No output captured. This usually indicates:\n"
1503
+ f" 1. Import error (missing module)\n"
1504
+ f" 2. Syntax error in Python code\n"
1505
+ f" 3. Missing executable or PATH issue\n"
1506
+ f" 4. Permission error\n"
1507
+ f"\nCommand: {' '.join(cmd)}\n"
1508
+ f"Working directory: {os.getcwd()}\n"
1509
+ f"Python: {env.get('PYTHON', 'python')}"
1510
+ )
1511
+
1512
+ # Create CompletedProcess-like object for compatibility
1513
+ class CompletedProcess:
1514
+ def __init__(self, returncode: int, stdout: str, stderr: str):
1515
+ assert isinstance(returncode, int), (
1516
+ f"returncode must be int, got {type(returncode).__name__}"
1517
+ )
1518
+ assert isinstance(stdout, str), (
1519
+ f"stdout must be str, got {type(stdout).__name__}"
1520
+ )
1521
+ assert isinstance(stderr, str), (
1522
+ f"stderr must be str, got {type(stderr).__name__}"
1523
+ )
1524
+ self.returncode = returncode
1525
+ self.stdout = stdout
1526
+ self.stderr = stderr
1527
+
1528
+ completed = CompletedProcess(returncode, stdout, stderr)
1529
+ assert isinstance(completed, CompletedProcess), (
1530
+ f"CompletedProcess() must return CompletedProcess, got {type(completed).__name__}"
1531
+ )
1532
+
1533
+ logger.info(
1534
+ "✅ Subprocess completed for job %s:\n"
1535
+ " Return code: %s\n"
1536
+ " Stdout length: %d chars\n"
1537
+ " Stderr length: %d chars",
1538
+ job.job_id,
1539
+ completed.returncode,
1540
+ len(completed.stdout) if completed.stdout else 0,
1541
+ len(completed.stderr) if completed.stderr else 0,
1542
+ )
1543
+
1544
+ # Final status update from complete output
1545
+ assert isinstance(completed.stdout, str), (
1546
+ f"completed.stdout must be str before final update, got {type(completed.stdout).__name__}"
1547
+ )
1548
+ assert len(completed.stdout) > 0 or len(accumulated_output) > 0, (
1549
+ "Must have some output for final status update"
1550
+ )
1551
+
1552
+ # Use accumulated_output if available (more complete), otherwise stdout
1553
+ final_output = accumulated_output if accumulated_output else completed.stdout
1554
+ assert isinstance(final_output, str), (
1555
+ f"final_output must be str, got {type(final_output).__name__}"
1556
+ )
1557
+
1558
+ update_status_from_output(
1559
+ status_tracker,
1560
+ final_output,
1561
+ policy=policy,
1562
+ environment=environment,
1563
+ start_time=job_start_time,
1564
+ )
1565
+ except subprocess.TimeoutExpired as e:
1566
+ logger.error("⏱️ Subprocess TIMEOUT for job %s after %s seconds", job.job_id, e.timeout)
1567
+ raise
1568
+ except Exception as e:
1569
+ logger.error(
1570
+ "❌ Subprocess EXCEPTION for job %s:\n"
1571
+ " Type: %s\n"
1572
+ " Message: %s",
1573
+ job.job_id,
1574
+ type(e).__name__,
1575
+ str(e),
1576
+ exc_info=True,
1577
+ )
1578
+ raise
1579
+ finally:
1580
+ # Stop progress poller
1581
+ if poller_thread and poller_thread.is_alive():
1582
+ poller_stop.set()
1583
+ poller_thread.join(timeout=5)
1584
+ logger.info("📡 Stopped progress poller for job %s", job.job_id)
1585
+
1586
+ # Log full output for debugging - prioritize auth errors
1587
+ logger.info("Training command returncode: %s", completed.returncode)
1588
+
1589
+ # Check for critical errors FIRST - these should cause failure even if returncode is 0
1590
+ stdout_lower = (completed.stdout or "").lower()
1591
+ stderr_lower = (completed.stderr or "").lower()
1592
+ combined_output = (completed.stdout or "") + "\n" + (completed.stderr or "")
1593
+ combined_lower = combined_output.lower()
1594
+
1595
+ # Check for health check failures (common cause of silent failures)
1596
+ health_check_failures = []
1597
+ health_check_details = []
1598
+ if "health check failed" in combined_lower or "aborting due to failing health check" in combined_lower:
1599
+ # Extract full context around health check failure - look for error patterns
1600
+ for source_name, source_text in [("STDOUT", completed.stdout), ("STDERR", completed.stderr)]:
1601
+ if not source_text:
1602
+ continue
1603
+ source_lower = source_text.lower()
1604
+ if "health check" in source_lower:
1605
+ # Find health check failure message
1606
+ idx = source_lower.find("health check")
1607
+ start = max(0, idx - 200)
1608
+ end = min(len(source_text), idx + 500)
1609
+ health_check_failures.append(f"{source_name} (health check context):\n{source_text[start:end]}")
1610
+
1611
+ # Also look for error patterns that might explain WHY it failed
1612
+ # Look for HTTP status codes, error messages, exceptions
1613
+ if "500" in source_text or "internal server error" in source_lower:
1614
+ # Find the 500 error context
1615
+ error_idx = source_lower.find("500") if "500" in source_text else source_lower.find("internal server error")
1616
+ if error_idx >= 0:
1617
+ error_start = max(0, error_idx - 100)
1618
+ error_end = min(len(source_text), error_idx + 800)
1619
+ health_check_details.append(f"{source_name} (500 error details):\n{source_text[error_start:error_end]}")
1620
+
1621
+ # Look for tracebacks or exception messages
1622
+ if "traceback" in source_lower or "exception" in source_lower or "error:" in source_lower:
1623
+ # Find traceback/exception
1624
+ tb_idx = source_lower.find("traceback") if "traceback" in source_lower else (
1625
+ source_lower.find("exception") if "exception" in source_lower else source_lower.find("error:")
1626
+ )
1627
+ if tb_idx >= 0:
1628
+ tb_start = max(0, tb_idx - 50)
1629
+ tb_end = min(len(source_text), tb_idx + 1500) # Get more context for tracebacks
1630
+ health_check_details.append(f"{source_name} (exception/traceback):\n{source_text[tb_start:tb_end]}")
1631
+
1632
+ # Look for specific error messages like "ModuleNotFoundError", "RuntimeError", etc.
1633
+ error_patterns = [
1634
+ r"(ModuleNotFoundError|ImportError|RuntimeError|ValueError|KeyError|AttributeError)[^\n]*",
1635
+ r"Failed to [^\n]+",
1636
+ r"Unable to [^\n]+",
1637
+ r"Missing [^\n]+",
1638
+ ]
1639
+ for pattern in error_patterns:
1640
+ matches = re.finditer(pattern, source_text, re.IGNORECASE | re.MULTILINE)
1641
+ for match in matches:
1642
+ match_start = max(0, match.start() - 100)
1643
+ match_end = min(len(source_text), match.end() + 300)
1644
+ health_check_details.append(f"{source_name} (error pattern '{pattern[:30]}...'):\n{source_text[match_start:match_end]}")
1645
+
1646
+ if health_check_failures:
1647
+ success = False
1648
+ # Build informative error message
1649
+ error_parts = [
1650
+ "Training command failed health check. Task app endpoint returned error.",
1651
+ ]
1652
+ if health_check_details:
1653
+ error_parts.append("See details below for root cause.")
1654
+ else:
1655
+ error_parts.append("Check task app logs and ensure /task_info endpoint is working.")
1656
+
1657
+ error_message = " ".join(error_parts)
1658
+
1659
+ logger.error(
1660
+ "🚨 HEALTH CHECK FAILURE for job %s:\n%s",
1661
+ job.job_id,
1662
+ "\n".join(health_check_failures),
1663
+ )
1664
+
1665
+ if health_check_details:
1666
+ logger.error(
1667
+ "🔍 ROOT CAUSE ANALYSIS for job %s:\n%s",
1668
+ job.job_id,
1669
+ "\n" + "="*80 + "\n".join(health_check_details) + "\n" + "="*80,
1670
+ )
1671
+
1672
+ # Check for authentication-related errors
1673
+ auth_keywords = [
1674
+ "authentication",
1675
+ "authorization",
1676
+ "api key",
1677
+ "api_key",
1678
+ "missing api",
1679
+ "invalid api",
1680
+ "unauthorized",
1681
+ "forbidden",
1682
+ "401",
1683
+ "403",
1684
+ "missing",
1685
+ "not set",
1686
+ "required",
1687
+ ]
1688
+
1689
+ auth_errors = []
1690
+ for keyword in auth_keywords:
1691
+ if keyword in stdout_lower:
1692
+ # Extract context around the keyword
1693
+ idx = stdout_lower.find(keyword)
1694
+ start = max(0, idx - 100)
1695
+ end = min(len(completed.stdout), idx + 200)
1696
+ auth_errors.append(f"STDOUT: ...{completed.stdout[start:end]}...")
1697
+ if keyword in stderr_lower:
1698
+ idx = stderr_lower.find(keyword)
1699
+ start = max(0, idx - 100)
1700
+ end = min(len(completed.stderr), idx + 200)
1701
+ auth_errors.append(f"STDERR: ...{completed.stderr[start:end]}...")
1702
+
1703
+ if auth_errors:
1704
+ logger.error(
1705
+ "🚨 AUTHENTICATION ERRORS DETECTED for job %s:\n%s",
1706
+ job.job_id,
1707
+ "\n".join(auth_errors),
1708
+ )
1709
+
1710
+ # Log full output (especially important for errors)
1711
+ if completed.stdout:
1712
+ if not success:
1713
+ # For errors, log full output
1714
+ logger.error("Training command stdout (FULL, %d chars):\n%s", len(completed.stdout), completed.stdout)
1715
+ else:
1716
+ # For success, log last 2000 chars
1717
+ logger.info("Training command stdout (last 2000 chars):\n%s", completed.stdout[-2000:])
1718
+ else:
1719
+ logger.warning("Training command stdout is EMPTY - command may have exited before producing output")
1720
+
1721
+ if completed.stderr:
1722
+ if not success:
1723
+ # For errors, log full output
1724
+ logger.error("Training command stderr (FULL, %d chars):\n%s", len(completed.stderr), completed.stderr)
1725
+ else:
1726
+ # For success, log last 2000 chars
1727
+ logger.warning("Training command stderr (last 2000 chars):\n%s", completed.stderr[-2000:])
1728
+ else:
1729
+ logger.info("Training command stderr is empty")
1730
+ # Validate inputs before collecting results
1731
+ assert prepared is not None, "prepared cannot be None"
1732
+ assert isinstance(prepared, PreparedConfig), (
1733
+ f"prepared must be PreparedConfig, got {type(prepared).__name__}"
1734
+ )
1735
+ assert isinstance(prepared.results_folder, Path), (
1736
+ f"prepared.results_folder must be Path, got {type(prepared.results_folder).__name__}"
1737
+ )
1738
+ assert isinstance(completed.stdout, str), (
1739
+ f"completed.stdout must be str, got {type(completed.stdout).__name__}"
1740
+ )
1741
+ assert isinstance(completed.stderr, str), (
1742
+ f"completed.stderr must be str, got {type(completed.stderr).__name__}"
1743
+ )
1744
+
1745
+ artifact_summary = collect_result_summary(
1746
+ prepared.results_folder,
1747
+ stdout=completed.stdout,
1748
+ stderr=completed.stderr,
1749
+ )
1750
+ assert isinstance(artifact_summary, ResultSummary), (
1751
+ f"collect_result_summary must return ResultSummary, got {type(artifact_summary).__name__}"
1752
+ )
1753
+
1754
+ artifact_summary.stdout = _truncate(completed.stdout)
1755
+ assert isinstance(artifact_summary.stdout, str), (
1756
+ f"artifact_summary.stdout must be str after truncate, got {type(artifact_summary.stdout).__name__}"
1757
+ )
1758
+ artifact_summary.stderr = _truncate(completed.stderr)
1759
+ assert isinstance(artifact_summary.stderr, str), (
1760
+ f"artifact_summary.stderr must be str after truncate, got {type(artifact_summary.stderr).__name__}"
1761
+ )
1762
+ artifact_summary.returncode = completed.returncode
1763
+ assert isinstance(artifact_summary.returncode, int), (
1764
+ f"artifact_summary.returncode must be int, got {type(artifact_summary.returncode).__name__}"
1765
+ )
1766
+ summary = artifact_summary
1767
+ assert isinstance(summary, ResultSummary), (
1768
+ f"summary must be ResultSummary, got {type(summary).__name__}"
1769
+ )
1770
+
1771
+ # ✅ FIX: If summary.total_rollouts is None, try to fetch from backend metadata stats
1772
+ # This handles cases where CLI output parsing fails but backend has accurate stats
1773
+ if summary.total_rollouts is None and backend_job_id:
1774
+ try:
1775
+ import requests
1776
+
1777
+ config = load_config()
1778
+ backend_url = config.backend_url
1779
+ try:
1780
+ api_key = _load_synth_api_key()
1781
+ except RuntimeError:
1782
+ api_key = None
1783
+
1784
+ if backend_url and api_key:
1785
+ url = f"{backend_url.rstrip('/')}/prompt-learning/online/jobs/{backend_job_id}"
1786
+ headers = {"Authorization": f"Bearer {api_key}"}
1787
+ resp = requests.get(url, headers=headers, timeout=10.0)
1788
+
1789
+ if resp.status_code == 200:
1790
+ backend_job = resp.json()
1791
+ backend_metadata = backend_job.get("metadata", {})
1792
+ backend_stats = backend_metadata.get("stats", {})
1793
+
1794
+ # Try to get total_rollouts from backend stats
1795
+ # Prefer total_rollouts, fallback to sum of optimization + validation rollouts
1796
+ backend_total_rollouts = backend_stats.get("total_rollouts")
1797
+ if backend_total_rollouts is None:
1798
+ opt_rollouts = backend_stats.get("optimization_rollouts_executed", 0) or 0
1799
+ val_rollouts = backend_stats.get("validation_rollouts_executed", 0) or 0
1800
+ if opt_rollouts > 0 or val_rollouts > 0:
1801
+ backend_total_rollouts = opt_rollouts + val_rollouts
1802
+
1803
+ if backend_total_rollouts is not None and backend_total_rollouts > 0:
1804
+ summary.total_rollouts = backend_total_rollouts
1805
+ logger.info(
1806
+ "✅ Extracted total_rollouts=%d from backend metadata stats for job %s (backend_job_id=%s)",
1807
+ backend_total_rollouts,
1808
+ job.job_id,
1809
+ backend_job_id,
1810
+ )
1811
+ except Exception as e:
1812
+ # Log but don't fail - backend fetch is best-effort fallback
1813
+ logger.debug(
1814
+ "Could not fetch backend stats to extract rollouts for job %s: %s",
1815
+ job.job_id,
1816
+ e,
1817
+ )
1818
+
1819
+ # Check if training actually ran - for prompt learning (GEPA/MIPRO), we expect results
1820
+ # Note: success may have been set to False above if health check failed
1821
+ if not error_message: # Only check returncode if we haven't already detected a failure
1822
+ success = completed.returncode == 0
1823
+ if success and job.job_type == "gepa":
1824
+ # GEPA should produce rollouts - that's the primary indicator of success
1825
+ # If returncode is 0 but no rollouts were produced, it failed silently
1826
+ if summary.total_rollouts is None or summary.total_rollouts == 0:
1827
+ success = False
1828
+ error_message = (
1829
+ "Training command exited with returncode 0 but produced no rollouts. "
1830
+ "This indicates GEPA did not actually run. "
1831
+ f"Check stdout/stderr for errors. "
1832
+ f"Results folder: {prepared.results_folder}"
1833
+ )
1834
+ logger.error(
1835
+ "Job %s failed silently: %s\nStdout tail:\n%s\nStderr tail:\n%s",
1836
+ job.job_id,
1837
+ error_message,
1838
+ summary.stdout[-1000:] if summary.stdout else "(empty)",
1839
+ summary.stderr[-1000:] if summary.stderr else "(empty)",
1840
+ )
1841
+ else:
1842
+ # We have rollouts - that's sufficient evidence GEPA ran successfully
1843
+ # Learning curve and stats are nice-to-have but not required
1844
+ logger.info(
1845
+ "Job %s completed successfully with %d rollouts (best_score=%s, learning_curve_points=%d, stats=%s)",
1846
+ job.job_id,
1847
+ summary.total_rollouts,
1848
+ summary.best_score,
1849
+ len(summary.learning_curve_points),
1850
+ "yes" if summary.stats else "no",
1851
+ )
1852
+
1853
+ if not success and not error_message:
1854
+ # Build detailed error message with FULL stdout/stderr
1855
+ error_parts = [f"Training command exited with {completed.returncode}"]
1856
+
1857
+ # Include FULL stdout if available (for errors, we want complete context)
1858
+ if completed.stdout:
1859
+ error_parts.append(f"\n\n{'='*80}\nSTDOUT (FULL, {len(completed.stdout)} chars):\n{'='*80}\n{completed.stdout}")
1860
+ else:
1861
+ error_parts.append("\n\nStdout: (empty - subprocess may have crashed immediately)")
1862
+
1863
+ # Include FULL stderr if available
1864
+ if completed.stderr:
1865
+ error_parts.append(f"\n\n{'='*80}\nSTDERR (FULL, {len(completed.stderr)} chars):\n{'='*80}\n{completed.stderr}")
1866
+ else:
1867
+ error_parts.append("\n\nStderr: (empty)")
1868
+
1869
+ error_message = "".join(error_parts)
1870
+
1871
+ # Log full error (truncate only for logger, but keep full in error_message)
1872
+ logger.error(
1873
+ "Job %s failed: %s\nFull stdout (%d chars):\n%s\nFull stderr (%d chars):\n%s",
1874
+ job.job_id,
1875
+ f"Training command exited with {completed.returncode}",
1876
+ len(completed.stdout) if completed.stdout else 0,
1877
+ completed.stdout if completed.stdout else "(empty)",
1878
+ len(completed.stderr) if completed.stderr else 0,
1879
+ completed.stderr if completed.stderr else "(empty)",
1880
+ )
1881
+ except Exception as exc:
1882
+ error_message = str(exc)
1883
+ summary.stderr = _truncate((summary.stderr or "") + f"\n{error_message}")
1884
+ logger.exception("Job %s encountered error: %s", job.job_id, error_message)
1885
+ finally:
1886
+ if prepared:
1887
+ prepared.cleanup()
1888
+
1889
+ # Prepare execution details for logging
1890
+ command_str = " ".join(cmd) if cmd is not None and len(cmd) > 0 else None
1891
+ working_dir = os.getcwd()
1892
+ if env is not None:
1893
+ python_exe = env.get("PYTHON", "python")
1894
+ env_keys = list(env.keys())
1895
+ else:
1896
+ python_exe = None
1897
+ env_keys = None
1898
+
1899
+ return _finalize_job(
1900
+ job.job_id,
1901
+ summary=summary,
1902
+ success=success,
1903
+ error_message=error_message,
1904
+ command=command_str,
1905
+ working_directory=working_dir,
1906
+ python_executable=python_exe,
1907
+ environment_keys=env_keys,
1908
+ )
1909
+
1910
+
1911
+ @celery_app.task(name="synth_ai.cli.local.experiment_queue.process_experiment_queue")
1912
+ def process_experiment_queue() -> dict[str, Any]:
1913
+ """Periodic task that checks for queued jobs and dispatches them.
1914
+
1915
+ This task runs every 5 seconds (via Celery Beat) to ensure queued jobs
1916
+ are dispatched even if:
1917
+ - Previous dispatch attempts failed
1918
+ - Jobs were queued while other jobs were running
1919
+ - Worker restarted and missed dispatch events
1920
+
1921
+ Returns a summary of dispatched jobs.
1922
+ """
1923
+ # Verify we're using the correct database
1924
+ from .config import load_config
1925
+ config = load_config()
1926
+ env_db_path = os.getenv("EXPERIMENT_QUEUE_DB_PATH")
1927
+ if env_db_path:
1928
+ from pathlib import Path
1929
+ env_db_path_resolved = Path(env_db_path).expanduser().resolve()
1930
+ if config.sqlite_path != env_db_path_resolved:
1931
+ logger.error(
1932
+ "Database path mismatch in periodic task! ENV: %s != CONFIG: %s",
1933
+ env_db_path_resolved,
1934
+ config.sqlite_path,
1935
+ )
1936
+
1937
+ logger.debug("Processing experiment queue for queued jobs (database: %s)", config.sqlite_path)
1938
+ dispatched_count = 0
1939
+ experiments_checked = 0
1940
+
1941
+ with session_scope() as session:
1942
+ # Find all running or queued experiments that might have jobs to dispatch
1943
+ active_experiments = (
1944
+ session.query(Experiment)
1945
+ .filter(
1946
+ Experiment.status.in_([ExperimentStatus.QUEUED, ExperimentStatus.RUNNING])
1947
+ )
1948
+ .all()
1949
+ )
1950
+
1951
+ for experiment in active_experiments:
1952
+ experiments_checked += 1
1953
+ # Check if there are any queued jobs without celery_task_id
1954
+ queued_jobs = (
1955
+ session.query(ExperimentJob)
1956
+ .filter(
1957
+ ExperimentJob.experiment_id == experiment.experiment_id,
1958
+ ExperimentJob.status == ExperimentJobStatus.QUEUED,
1959
+ ExperimentJob.celery_task_id.is_(None),
1960
+ )
1961
+ .count()
1962
+ )
1963
+
1964
+ if queued_jobs > 0:
1965
+ logger.debug(
1966
+ "Found %d queued jobs for experiment %s, attempting dispatch",
1967
+ queued_jobs,
1968
+ experiment.experiment_id,
1969
+ )
1970
+ dispatched = dispatch_available_jobs(session, experiment.experiment_id)
1971
+ dispatched_count += len(dispatched)
1972
+ if dispatched:
1973
+ logger.info(
1974
+ "Dispatched %d jobs for experiment %s",
1975
+ len(dispatched),
1976
+ experiment.experiment_id,
1977
+ )
1978
+
1979
+ result = {
1980
+ "dispatched": dispatched_count,
1981
+ "experiments_checked": experiments_checked,
1982
+ }
1983
+ logger.debug("Queue check completed: %s", result)
1984
+ return result