synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -466,11 +466,20 @@ async def step_policy(
466
466
 
467
467
  if tracing_context is not None:
468
468
  try:
469
+ print(
470
+ f"[TRACE_DEBUG] record_policy_prompts sys={len(system_prompt_records)} user={len(user_prompt_records)}",
471
+ flush=True,
472
+ )
469
473
  await tracing_context.record_policy_prompts(
470
474
  system_prompt_records, user_prompt_records
471
475
  )
472
476
  except Exception as exc:
473
477
  logger.debug(f"TRACING_PROMPTS_FAIL: {exc}")
478
+ else:
479
+ print(
480
+ f"[TRACE_DEBUG] Missing tracing context on policy step; policy_id={request.policy_id}",
481
+ flush=True,
482
+ )
474
483
 
475
484
  # Create inference client (choose API key by target provider)
476
485
  # Require inference_url to be set explicitly by the rollout policy config.
@@ -492,7 +501,11 @@ async def step_policy(
492
501
  if isinstance(target_url, str):
493
502
  low_url = target_url.lower()
494
503
  # Proxy endpoints should not receive a bearer; the server-side proxy holds the vendor key
495
- if "/proxy/groq" in low_url or "/proxy/openai" in low_url:
504
+ if (
505
+ "/proxy/groq" in low_url
506
+ or "/proxy/openai" in low_url
507
+ or "/proxy/v1" in low_url
508
+ ):
496
509
  api_key_override = None
497
510
  elif "openai.com" in low_url:
498
511
  api_key_override = _os.getenv("OPENAI_API_KEY") or getattr(
@@ -692,9 +705,10 @@ async def step_policy(
692
705
  "sokoban-react",
693
706
  "crafter-react",
694
707
  ) and getattr(policy, "use_tools", True):
695
- req_tools = meta["inference_request"]["tools"]
696
- req_tool_choice = meta["inference_request"]["tool_choice"]
697
- req_stop_after = meta["inference_request"]["stop_after_tool_calls"]
708
+ inf_req = meta.get("inference_request", {})
709
+ req_tools = inf_req.get("tools")
710
+ req_tool_choice = inf_req.get("tool_choice")
711
+ req_stop_after = inf_req.get("stop_after_tool_calls")
698
712
  logger.info(
699
713
  f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
700
714
  )
@@ -703,6 +717,8 @@ async def step_policy(
703
717
  status_code=500,
704
718
  detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
705
719
  )
720
+ if req_stop_after is None:
721
+ inf_req["stop_after_tool_calls"] = 1
706
722
 
707
723
  # Call inference service with retries for Flash cold-start (503)
708
724
  import time as _t
@@ -951,6 +967,23 @@ async def step_policy(
951
967
  except Exception as exc:
952
968
  logger.debug(f"TRACING_LLM_FAIL: {exc}")
953
969
 
970
+ if not tool_calls:
971
+ preview = ""
972
+ try:
973
+ preview = str(meta.get("raw_response") or "")[:400]
974
+ except Exception:
975
+ preview = "<unavailable>"
976
+ logger.error(
977
+ {
978
+ "rollout.policy_step": True,
979
+ "policy_id": request.policy_id,
980
+ "error": "no_tool_calls",
981
+ "inference_url": meta.get("inference_url"),
982
+ "raw_preview": preview,
983
+ }
984
+ )
985
+ raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
986
+
954
987
  return PolicyStepResponse(
955
988
  tool_calls=tool_calls,
956
989
  meta=meta,
@@ -223,6 +223,7 @@ class RolloutTracingContext:
223
223
  ).lower()
224
224
  self.return_trace = bool(getattr(request.record, "return_trace", False))
225
225
  self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
226
+ print(f"[TRACE_DEBUG] RolloutTracingContext init: trace_format={self.trace_format} return_trace={self.return_trace}", flush=True)
226
227
  self.session_trace = None
227
228
  self.metadata_updates: dict[str, Any] = {}
228
229
  self.policy_name = request.policy.policy_name or ""
@@ -244,19 +245,24 @@ class RolloutTracingContext:
244
245
 
245
246
  async def start_session(self) -> None:
246
247
  if not self.enabled or self.tracer is None:
248
+ print("[TRACE_DEBUG] start_session skipped: tracer disabled", flush=True)
247
249
  return
248
250
  try:
249
251
  await self.tracer.initialize()
252
+ print("[TRACE_DEBUG] tracer initialized", flush=True)
250
253
  except Exception as exc:
251
254
  logger.debug("TRACING_INIT_FAIL: %s", exc)
255
+ # Hard fail: tracing requested but cannot initialize
256
+ raise
252
257
  try:
253
258
  await self.tracer.start_session(
254
259
  session_id=self.run_id, metadata=dict(self.metadata_base)
255
260
  )
261
+ print(f"[TRACE_DEBUG] start_session succeeded for run_id={self.run_id}", flush=True)
256
262
  except Exception as exc:
257
263
  logger.warning("TRACING_START_FAIL: %s", exc)
258
- self.enabled = False
259
- self.tracer = None
264
+ # Hard fail: tracing requested but cannot start session
265
+ raise
260
266
 
261
267
  async def start_decision(self, turn_number: int) -> None:
262
268
  self.current_turn = turn_number
@@ -317,6 +323,9 @@ class RolloutTracingContext:
317
323
  )
318
324
  except Exception as exc:
319
325
  logger.debug("TRACING_USER_MSG_FAIL: %s", exc)
326
+ if self.tracer and self.tracer._current_trace:
327
+ msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
328
+ print(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages", flush=True)
320
329
 
321
330
  def _content_to_text(self, content: Any) -> str:
322
331
  if isinstance(content, str):
@@ -395,6 +404,11 @@ class RolloutTracingContext:
395
404
  message_type="policy_tool_call",
396
405
  metadata=self._message_metadata(),
397
406
  )
407
+ if self.tracer._current_trace:
408
+ print(
409
+ f"[TRACE_DEBUG] After tool invocation: messages={len(self.tracer._current_trace.markov_blanket_message_history)}",
410
+ flush=True,
411
+ )
398
412
  except Exception as exc:
399
413
  logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
400
414
 
@@ -664,12 +678,24 @@ class RolloutTracingContext:
664
678
  except Exception as exc:
665
679
  logger.debug("TRACING_OUTCOME_FAIL: %s", exc)
666
680
  try:
681
+ if self.tracer._current_trace:
682
+ msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
683
+ print(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace", flush=True)
667
684
  self.session_trace = await self.tracer.end_session()
668
685
  if self.session_trace is not None:
669
686
  self.session_trace.metadata.update(self.metadata_updates)
687
+ print(
688
+ f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}",
689
+ flush=True,
690
+ )
691
+ print(
692
+ f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}",
693
+ flush=True,
694
+ )
670
695
  except Exception as exc:
671
696
  logger.debug("TRACING_END_SESSION_FAIL: %s", exc)
672
697
  self.session_trace = None
698
+ print(f"[TRACE_DEBUG] end_session failed for run_id={self.run_id}: {exc}", flush=True)
673
699
  with contextlib.suppress(Exception):
674
700
  await self.tracer.close()
675
701
 
@@ -700,9 +726,13 @@ class RolloutTracingContext:
700
726
  def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
701
727
  if not self.return_trace or session_trace is None:
702
728
  return None
703
- if self.trace_format == "full":
729
+ if self.trace_format in ("full", "structured"):
704
730
  payload = session_trace.to_dict()
705
731
  payload.setdefault("metadata", {}).update(self.metadata_updates)
732
+ print(
733
+ f"[TRACE_DEBUG] build_trace_payload returning structured trace with messages={len(payload.get('markov_blanket_message_history') or [])}",
734
+ flush=True,
735
+ )
706
736
  return payload
707
737
  metadata = dict(session_trace.metadata)
708
738
  metadata.update(self.metadata_updates)
@@ -1,6 +1,7 @@
1
1
  """Utility functions for the task service."""
2
2
 
3
3
  from typing import Any
4
+ from urllib.parse import urlparse, urlunparse
4
5
 
5
6
  import numpy as np
6
7
 
@@ -60,3 +61,69 @@ def sanitize_observation(observation: dict[str, Any]) -> dict[str, Any]:
60
61
  sanitized[key] = convert_numpy_to_python(value)
61
62
 
62
63
  return sanitized
64
+
65
+
66
+ _CHAT_COMPLETIONS_SUFFIX = "/v1/chat/completions"
67
+
68
+
69
+ def force_normalize_chat_completions_url(raw_url: Any) -> Any:
70
+ """
71
+ Convert ANY malformed inference URL into the correct chat-completions form.
72
+ Ensures path ends with /v1/chat/completions and that query has no '/' segments.
73
+ """
74
+ if not isinstance(raw_url, str):
75
+ return raw_url
76
+ url = raw_url.strip()
77
+ if not url:
78
+ return raw_url
79
+
80
+ parsed = urlparse(url)
81
+ path = (parsed.path or "").rstrip("/")
82
+ query = parsed.query or ""
83
+
84
+ # If query contains a path, extract and repair
85
+ if query and "/" in query:
86
+ before_slash, after_slash = query.split("/", 1)
87
+ cut_positions = [i for i in [after_slash.find("&"), after_slash.find("?")] if i >= 0]
88
+ cut = min(cut_positions) if cut_positions else len(after_slash)
89
+ path_from_query = "/" + after_slash[:cut]
90
+ extra_query = after_slash[cut + 1 :] if cut < len(after_slash) else ""
91
+ merged_query = before_slash if before_slash else ""
92
+ if extra_query:
93
+ merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
94
+ final_path = (
95
+ path_from_query
96
+ if path_from_query.startswith(_CHAT_COMPLETIONS_SUFFIX)
97
+ else f"{path_from_query.rstrip('/')}{_CHAT_COMPLETIONS_SUFFIX}"
98
+ )
99
+ parsed = parsed._replace(path=final_path, query=merged_query)
100
+ url = urlunparse(parsed)
101
+ parsed = urlparse(url)
102
+ path = parsed.path or ""
103
+ query = parsed.query or ""
104
+
105
+ # Ensure path suffix
106
+ if not path.endswith(_CHAT_COMPLETIONS_SUFFIX):
107
+ new_path = f"{path}{_CHAT_COMPLETIONS_SUFFIX}" if path else _CHAT_COMPLETIONS_SUFFIX
108
+ parsed = parsed._replace(path=new_path)
109
+ url = urlunparse(parsed)
110
+ parsed = urlparse(url)
111
+ path = parsed.path or ""
112
+ query = parsed.query or ""
113
+
114
+ # Last-resort: strip any '/' from query
115
+ if query and "/" in query:
116
+ safe_query = query.split("/")[0]
117
+ parsed = parsed._replace(query=safe_query)
118
+ url = urlunparse(parsed)
119
+
120
+ return url
121
+
122
+
123
+ def ensure_chat_completions_url(raw_url: Any, mode: Any = None) -> Any:
124
+ """
125
+ Mode-aware normalizer (RL/EVAL) that returns a valid chat completions URL and
126
+ preserves existing query parameters.
127
+ """
128
+ # For now reuse force normalizer in both modes to guarantee correctness
129
+ return force_normalize_chat_completions_url(raw_url)
@@ -1,8 +1,15 @@
1
+ [algorithm]
2
+ type = "online"
3
+ method = "policy_gradient"
4
+ variety = "gspo"
5
+
1
6
  [services]
2
7
  task_url = "https://your-math-task.modal.run"
3
8
 
4
9
  [model]
5
10
  base = "Qwen/Qwen3-4B"
11
+ trainer_mode = "full"
12
+ label = "math-single-step-qwen3-4b"
6
13
 
7
14
  [policy]
8
15
  model = "Qwen/Qwen3-4B"
@@ -18,6 +25,8 @@ evaluation_split = "validation"
18
25
  evaluation_episodes = 256
19
26
 
20
27
  [training]
28
+ num_epochs = 1
29
+ iterations_per_epoch = 20
21
30
  max_turns = 1
22
31
  ops = ["agent", "env"]
23
32
  batch_size = 128
@@ -31,5 +40,23 @@ learning_rate = 5e-6
31
40
  gpu_type = "A10G"
32
41
  gpu_count = 4
33
42
 
43
+ [topology]
44
+ type = "single_node_split"
45
+ gpus_for_vllm = 2
46
+ gpus_for_training = 2
47
+ gpus_for_ref = 0
48
+ tensor_parallel = 1
49
+
50
+ [rollout]
51
+ env_name = "math"
52
+ policy_name = "math-single-step"
53
+ max_turns = 1
54
+ episodes_per_batch = 256
55
+
56
+ [evaluation]
57
+ instances = 256
58
+ every_n_iters = 10
59
+ seeds = [0, 1, 2, 3, 4]
60
+
34
61
  [tags]
35
62
  experiment = "math_single_step"
@@ -8,6 +8,8 @@ task_url = "http://localhost:8101"
8
8
 
9
9
  [model]
10
10
  base = "Qwen/Qwen3-1.7B"
11
+ trainer_mode = "full"
12
+ label = "math-single-step-qwen3-1.7b"
11
13
 
12
14
  [policy]
13
15
  model = "Qwen/Qwen3-1.7B"
@@ -23,6 +25,8 @@ evaluation_split = "validation"
23
25
  evaluation_episodes = 50
24
26
 
25
27
  [training]
28
+ num_epochs = 1
29
+ iterations_per_epoch = 20
26
30
  max_turns = 1
27
31
  ops = ["agent", "env"]
28
32
  batch_size = 2
@@ -61,9 +65,11 @@ health_max_wait_s = 180
61
65
  health_interval_ms = 300
62
66
 
63
67
  [rollout]
68
+ env_name = "math"
64
69
  policy_name = "math-single-step"
65
70
  max_turns = 1
66
71
  episodes_per_batch = 32 # group_size * batch_size
72
+ task_app_origin_rewards_only = true
67
73
 
68
74
  [evaluation]
69
75
  instances = 32
@@ -33,7 +33,7 @@ try:
33
33
  except Exception as exc: # pragma: no cover - critical dependency
34
34
  raise RuntimeError("Unable to load SFT payload helpers") from exc
35
35
 
36
- from .configs import RLConfig, SFTConfig
36
+ from .configs import PromptLearningConfig, RLConfig, SFTConfig
37
37
  from .supported_algos import (
38
38
  AlgorithmValidationError,
39
39
  ensure_model_supported_for_algorithm,
@@ -56,6 +56,12 @@ class SFTBuildResult:
56
56
  validation_file: Path | None
57
57
 
58
58
 
59
+ @dataclass(slots=True)
60
+ class PromptLearningBuildResult:
61
+ payload: dict[str, Any]
62
+ task_url: str
63
+
64
+
59
65
  def _format_validation_error(path: Path, exc: ValidationError) -> str:
60
66
  lines: list[str] = []
61
67
  for error in exc.errors():
@@ -74,12 +80,23 @@ def build_rl_payload(
74
80
  idempotency: str | None,
75
81
  allow_experimental: bool | None = None,
76
82
  ) -> RLBuildResult:
83
+ # Load and validate config with SDK-level checks
84
+ from synth_ai.api.train.utils import load_toml
85
+ from synth_ai.cli.commands.train.validation import validate_rl_config
86
+
77
87
  try:
78
- rl_cfg = RLConfig.from_path(config_path)
88
+ raw_config = load_toml(config_path)
89
+ validated_config = validate_rl_config(raw_config) # Adds defaults & validates
90
+ rl_cfg = RLConfig.from_mapping(validated_config)
79
91
  except ValidationError as exc:
80
92
  raise click.ClickException(_format_validation_error(config_path, exc)) from exc
81
93
 
82
94
  data = rl_cfg.to_dict()
95
+
96
+ # Remove smoke section - it's CLI-only and should not be sent to the trainer
97
+ if "smoke" in data:
98
+ del data["smoke"]
99
+
83
100
  # Ensure required [reference] section for backend validators
84
101
  try:
85
102
  ref_cfg = data.get("reference") if isinstance(data, dict) else None
@@ -110,8 +127,8 @@ def build_rl_payload(
110
127
  "Task app URL required (provide --task-url or set services.task_url in TOML)"
111
128
  )
112
129
 
113
- model_source = (model_cfg.source or "").strip()
114
- model_base = (model_cfg.base or "").strip()
130
+ model_source = (model_cfg.source or "").strip() if model_cfg else ""
131
+ model_base = (model_cfg.base or "").strip() if model_cfg else ""
115
132
  override_model = (overrides.get("model") or "").strip()
116
133
  if override_model:
117
134
  model_source = override_model
@@ -343,9 +360,87 @@ def build_sft_payload(
343
360
  return SFTBuildResult(payload=payload, train_file=dataset_path, validation_file=validation_file)
344
361
 
345
362
 
363
+ def build_prompt_learning_payload(
364
+ *,
365
+ config_path: Path,
366
+ task_url: str | None,
367
+ overrides: dict[str, Any],
368
+ allow_experimental: bool | None = None,
369
+ ) -> PromptLearningBuildResult:
370
+ """Build payload for prompt learning job (MIPRO or GEPA)."""
371
+ import os
372
+
373
+ from pydantic import ValidationError
374
+
375
+ from .configs.prompt_learning import load_toml
376
+
377
+ # SDK-SIDE VALIDATION: Catch errors BEFORE sending to backend
378
+ from .validators import validate_prompt_learning_config
379
+
380
+ raw_config = load_toml(config_path)
381
+ validate_prompt_learning_config(raw_config, config_path)
382
+
383
+ try:
384
+ pl_cfg = PromptLearningConfig.from_path(config_path)
385
+ except ValidationError as exc:
386
+ raise click.ClickException(_format_validation_error(config_path, exc)) from exc
387
+
388
+ # Source of truth: TOML only (ignore shell/env and CLI overrides)
389
+ final_task_url = (pl_cfg.task_app_url or "").strip()
390
+
391
+ if not final_task_url:
392
+ raise click.ClickException(
393
+ "Task app URL required (provide --task-url or set prompt_learning.task_app_url in TOML)"
394
+ )
395
+
396
+ # Get task_app_api_key from config or environment
397
+ task_app_api_key = (
398
+ pl_cfg.task_app_api_key
399
+ or os.environ.get("ENVIRONMENT_API_KEY", "")
400
+ ).strip()
401
+
402
+ if not task_app_api_key:
403
+ raise click.ClickException(
404
+ "Task app API key required (set prompt_learning.task_app_api_key in TOML or ENVIRONMENT_API_KEY env var)"
405
+ )
406
+
407
+ # Build config dict for backend
408
+ config_dict = pl_cfg.to_dict()
409
+
410
+ # Ensure task_app_url and task_app_api_key are set
411
+ pl_section = config_dict.get("prompt_learning", {})
412
+ if isinstance(pl_section, dict):
413
+ pl_section["task_app_url"] = final_task_url
414
+ pl_section["task_app_api_key"] = task_app_api_key
415
+ else:
416
+ config_dict["prompt_learning"] = {
417
+ "task_app_url": final_task_url,
418
+ "task_app_api_key": task_app_api_key,
419
+ }
420
+
421
+ # Build payload matching backend API format
422
+ payload: dict[str, Any] = {
423
+ "algorithm": pl_cfg.algorithm,
424
+ "config_body": config_dict,
425
+ "overrides": overrides.get("overrides", {}),
426
+ "metadata": overrides.get("metadata", {}),
427
+ "auto_start": overrides.get("auto_start", True),
428
+ }
429
+
430
+ backend = overrides.get("backend")
431
+ if backend:
432
+ metadata_default: dict[str, Any] = {}
433
+ metadata = cast(dict[str, Any], payload.setdefault("metadata", metadata_default))
434
+ metadata["backend_base_url"] = ensure_api_base(str(backend))
435
+
436
+ return PromptLearningBuildResult(payload=payload, task_url=final_task_url)
437
+
438
+
346
439
  __all__ = [
440
+ "PromptLearningBuildResult",
347
441
  "RLBuildResult",
348
442
  "SFTBuildResult",
443
+ "build_prompt_learning_payload",
349
444
  "build_rl_payload",
350
445
  "build_sft_payload",
351
446
  ]