synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -149,7 +149,11 @@ class OpenAIClient:
149
149
  OpenAI-compatible chat completion response
150
150
  """
151
151
  base = (base_url or self.base_url).rstrip("/")
152
- url = base + "/v1/chat/completions"
152
+ # Don't append /v1/chat/completions if the URL already contains it
153
+ if "/v1/chat/completions" in base:
154
+ url = base
155
+ else:
156
+ url = base + "/v1/chat/completions"
153
157
  timeout = timeout_s or self.timeout_s
154
158
 
155
159
  # Merge headers
@@ -164,10 +168,28 @@ class OpenAIClient:
164
168
  except Exception:
165
169
  pass
166
170
 
167
- # If target is our in-app Groq proxy, force Authorization to use GROQ_API_KEY
171
+ # Set Authorization header based on the target URL
168
172
  try:
169
173
  low_url = (url or "").lower()
170
- if "/proxy/groq" in low_url or "groq" in low_url:
174
+
175
+ # If calling OpenAI directly (api.openai.com)
176
+ if "api.openai.com" in low_url:
177
+ openai_key = os.getenv("OPENAI_API_KEY")
178
+ if openai_key and isinstance(openai_key, str):
179
+ headers["Authorization"] = f"Bearer {openai_key}"
180
+
181
+ # If target is Synth backend (any deployment), use SYNTH_API_KEY
182
+ # Matches: synth-backend-*, agent-learning*, localhost:8000, 127.0.0.1:8000
183
+ elif any(pattern in low_url for pattern in [
184
+ "synth-backend", "synth.run", "agent-learning",
185
+ "localhost:8000", "127.0.0.1:8000"
186
+ ]):
187
+ synth_key = os.getenv("SYNTH_API_KEY")
188
+ if synth_key and isinstance(synth_key, str):
189
+ headers["Authorization"] = f"Bearer {synth_key}"
190
+
191
+ # If target is Groq, use GROQ_API_KEY
192
+ elif "/proxy/groq" in low_url or "api.groq.com" in low_url:
171
193
  gk = os.getenv("GROQ_API_KEY")
172
194
  if gk and isinstance(gk, str):
173
195
  headers["Authorization"] = f"Bearer {gk}"
@@ -196,8 +218,20 @@ class OpenAIClient:
196
218
  # Do NOT fall back silently; surface the error so callers fail fast
197
219
  raise
198
220
 
221
+ # DEBUG: Log request BEFORE _fix_model_parameters
222
+ logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Request message[1] content type: {type(request.get('messages', [])[1].get('content') if len(request.get('messages', [])) > 1 else None)}")
223
+ if len(request.get("messages", [])) > 1:
224
+ msg1_content = request["messages"][1].get("content")
225
+ logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Message[1] content value: {msg1_content if not isinstance(msg1_content, list) else f'list[{len(msg1_content)}]'}")
226
+
199
227
  # Fix parameter compatibility for newer models
200
228
  processed_request = self._fix_model_parameters(request, target_url=url)
229
+
230
+ # DEBUG: Log request AFTER _fix_model_parameters
231
+ logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Processed message[1] content type: {type(processed_request.get('messages', [])[1].get('content') if len(processed_request.get('messages', [])) > 1 else None)}")
232
+ if len(processed_request.get("messages", [])) > 1:
233
+ msg1_content_post = processed_request["messages"][1].get("content")
234
+ logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
201
235
 
202
236
  # Log request (redact messages in production)
203
237
  logger.info(f"Inference POST target: {url}")
@@ -206,6 +240,24 @@ class OpenAIClient:
206
240
  with contextlib.suppress(Exception):
207
241
  keys_preview = sorted(processed_request.keys())
208
242
  logger.info(f"Request keys: {keys_preview}")
243
+ # DEBUG: Log message structure for vision debugging
244
+ if "messages" in processed_request:
245
+ msgs = processed_request["messages"]
246
+ if isinstance(msgs, list):
247
+ logger.debug(f"🔊 [OPENAI_CLIENT] Request has {len(msgs)} messages")
248
+ for idx, msg in enumerate(msgs):
249
+ if isinstance(msg, dict):
250
+ role = msg.get("role")
251
+ content = msg.get("content")
252
+ if isinstance(content, list):
253
+ logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content=list[{len(content)}]")
254
+ for part_idx, part in enumerate(content):
255
+ if isinstance(part, dict):
256
+ part_type = part.get("type")
257
+ logger.debug(f"🔊 [OPENAI_CLIENT] Part[{part_idx}]: type={part_type}")
258
+ else:
259
+ content_len = len(str(content)) if content else 0
260
+ logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
209
261
 
210
262
  # Final hard-guard for OpenAI: ensure unsupported field is not present
211
263
  try:
@@ -10,11 +10,13 @@ from fastapi import APIRouter, HTTPException, Request
10
10
  from pydantic import BaseModel
11
11
 
12
12
  from synth_ai.task.auth import allowed_environment_api_keys, normalize_environment_api_key
13
+ from synth_ai.task.contracts import RolloutMode
13
14
 
14
15
  from .envs.crafter.policy import CrafterPolicy
15
16
  from .inference.openai_client import create_inference_client
16
17
  from .registry import registry
17
18
  from .storage.volume import storage
19
+ from .utils import ensure_chat_completions_url
18
20
 
19
21
  # Token budgeting (shared logic with inference server)
20
22
  try:
@@ -40,6 +42,7 @@ class PolicyCreateRequest(BaseModel):
40
42
  parent_policy_id: str | None = None
41
43
  rl_run_id: str
42
44
  bound_env_id: str | None = None
45
+ mode: RolloutMode
43
46
 
44
47
 
45
48
  class PolicyCreateResponse(BaseModel):
@@ -119,6 +122,14 @@ async def create_policy(
119
122
  config.setdefault("inference_url", f"{base_url}/proxy")
120
123
  config["provider"] = "openai"
121
124
 
125
+ received_url = config.get("inference_url")
126
+ logger.info(
127
+ "POLICY_CREATE: policy=%s provider=%s raw_inference_url=%s",
128
+ request.policy_name,
129
+ provider,
130
+ received_url,
131
+ )
132
+
122
133
  if "inference_url" not in config and task_app is not None:
123
134
  task_base_url = getattr(task_app, "vllm_base_url", None)
124
135
  if task_base_url:
@@ -133,6 +144,31 @@ async def create_policy(
133
144
  detail="Policy configuration must include 'inference_url' and 'model'.",
134
145
  )
135
146
 
147
+ # Get mode from PolicyCreateRequest (defaults to "rl" for backward compatibility)
148
+ mode = request.mode
149
+ logger.info("POLICY_CREATE: Using mode=%s for URL processing", mode)
150
+
151
+ sanitized_url = ensure_chat_completions_url(config.get("inference_url"), mode=mode)
152
+ if isinstance(sanitized_url, str) and sanitized_url:
153
+ if sanitized_url != config.get("inference_url"):
154
+ logger.warning(
155
+ "POLICY_CREATE: normalized inference_url for policy=%s provider=%s mode=%s from %s to %s",
156
+ request.policy_name,
157
+ provider,
158
+ mode,
159
+ config.get("inference_url"),
160
+ sanitized_url,
161
+ )
162
+ config["inference_url"] = sanitized_url
163
+ else:
164
+ logger.warning(
165
+ "POLICY_CREATE: unable to normalize inference_url for policy=%s provider=%s mode=%s raw=%s",
166
+ request.policy_name,
167
+ mode,
168
+ provider,
169
+ config.get("inference_url"),
170
+ )
171
+
136
172
  # Create policy instance based on name
137
173
  pname = request.policy_name.lower()
138
174
  if pname in ["crafter-react", "crafter"]:
@@ -381,8 +417,6 @@ async def step_policy(
381
417
  inf_req = meta["inference_request"]
382
418
  msgs = inf_req["messages"]
383
419
  model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
384
- system_messages: list[str] = []
385
- user_messages: list[str] = []
386
420
  if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
387
421
  sys_text = msgs[0]["content"]
388
422
  policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
@@ -507,7 +541,22 @@ async def step_policy(
507
541
 
508
542
  # Ensure meta carries the final target URL for downstream logging/clients
509
543
  with contextlib.suppress(Exception):
510
- meta["inference_url"] = target_url
544
+ sanitized_target = ensure_chat_completions_url(target_url)
545
+ if sanitized_target and sanitized_target != target_url:
546
+ logger.warning(
547
+ "POLICY_STEP: normalized inference_url mid-flight policy=%s from %s to %s",
548
+ policy_name,
549
+ target_url,
550
+ sanitized_target,
551
+ )
552
+ elif not sanitized_target:
553
+ logger.info(
554
+ "POLICY_STEP: inference_url unchanged policy=%s target=%s",
555
+ policy_name,
556
+ target_url,
557
+ )
558
+ meta["inference_url"] = sanitized_target if sanitized_target else target_url
559
+ target_url = sanitized_target or target_url
511
560
 
512
561
  # Select API key based on resolved target URL
513
562
  api_key_override = None
@@ -850,38 +899,71 @@ async def step_policy(
850
899
  req_body["temperature"] = 0.1
851
900
  meta["inference_request"] = req_body
852
901
 
853
- # Strip image parts: Crafter policy currently only uses text prompts.
854
- # Some providers reject image_url payloads entirely, so always flatten to plain text.
855
- req_body2 = meta.get("inference_request", {})
856
- if isinstance(req_body2, dict):
857
- msgs = req_body2.get("messages")
858
- if isinstance(msgs, list):
859
- new_msgs = []
860
- changed = False
861
- for m in msgs:
862
- try:
863
- if isinstance(m, dict):
864
- content = m.get("content")
865
- if isinstance(content, list):
866
- parts: list[str] = []
867
- for seg in content:
868
- if isinstance(seg, dict):
869
- txt = seg.get("text") or seg.get("content")
870
- if isinstance(txt, str) and txt:
871
- parts.append(txt)
872
- m2 = dict(m)
873
- m2["content"] = "\n".join(parts)
874
- new_msgs.append(m2)
875
- changed = True
902
+ # Message flattening: Convert multimodal content to text-only for non-vision models.
903
+ # SKIP message flattening for vision models to preserve image_url parts!
904
+ # The old code here was flattening multimodal content (list) to text-only (str),
905
+ # which strips out image_url parts. This breaks vision models.
906
+ # Only flatten for non-vision models that can't handle multimodal format.
907
+ is_vision_model = False
908
+ try:
909
+ # Check if the policy is a vision-capable policy
910
+ if isinstance(policy, CrafterPolicy):
911
+ is_vision_model = getattr(policy, "use_vision", False)
912
+ except Exception:
913
+ pass
914
+
915
+ logger.debug(f"🔊 [POLICY_ROUTES] is_vision_model={is_vision_model}, will_flatten={not is_vision_model}")
916
+
917
+ if not is_vision_model:
918
+ # Only flatten for non-vision models (backward compatibility)
919
+ req_body2 = meta.get("inference_request", {})
920
+ if isinstance(req_body2, dict):
921
+ msgs = req_body2.get("messages")
922
+ if isinstance(msgs, list):
923
+ new_msgs = []
924
+ changed = False
925
+ for m in msgs:
926
+ try:
927
+ if isinstance(m, dict):
928
+ content = m.get("content")
929
+ if isinstance(content, list):
930
+ parts: list[str] = []
931
+ for seg in content:
932
+ if isinstance(seg, dict):
933
+ txt = seg.get("text") or seg.get("content")
934
+ if isinstance(txt, str) and txt:
935
+ parts.append(txt)
936
+ m2 = dict(m)
937
+ m2["content"] = "\n".join(parts)
938
+ new_msgs.append(m2)
939
+ changed = True
940
+ else:
941
+ new_msgs.append(m)
876
942
  else:
877
943
  new_msgs.append(m)
878
- else:
944
+ except Exception:
879
945
  new_msgs.append(m)
880
- except Exception:
881
- new_msgs.append(m)
882
- if changed:
883
- req_body2["messages"] = new_msgs
884
- meta["inference_request"] = req_body2
946
+ if changed:
947
+ req_body2["messages"] = new_msgs
948
+ meta["inference_request"] = req_body2
949
+ logger.debug(f"🔊 [POLICY_ROUTES] Flattened messages for non-vision model")
950
+ else:
951
+ logger.debug(f"🔊 [POLICY_ROUTES] Preserving multimodal content for vision model")
952
+
953
+ # DEBUG: Log final message structure before calling inference
954
+ final_req = meta.get("inference_request", {})
955
+ if isinstance(final_req, dict):
956
+ final_msgs = final_req.get("messages", [])
957
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Sending {len(final_msgs)} messages to inference")
958
+ for idx, msg in enumerate(final_msgs):
959
+ if isinstance(msg, dict):
960
+ content = msg.get("content")
961
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
962
+ if isinstance(content, list):
963
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Content list has {len(content)} items")
964
+ for part_idx, part in enumerate(content[:3]): # Show first 3 items
965
+ if isinstance(part, dict):
966
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Part[{part_idx}]: type={part.get('type')}")
885
967
 
886
968
  _t_start = _t.time()
887
969
  call_started_at = datetime.utcnow()
@@ -13,6 +13,7 @@ from pydantic import BaseModel, Field
13
13
  from synth_ai.lm.vendors.base import BaseLMResponse
14
14
  from synth_ai.task.tracing_utils import unique_sft_path
15
15
  from synth_ai.tracing_v3.abstractions import EnvironmentEvent, LMCAISEvent, TimeRecord
16
+ from synth_ai.task.contracts import RolloutMode
16
17
  from synth_ai.tracing_v3.llm_call_record_helpers import create_llm_call_record_from_response
17
18
  from synth_ai.tracing_v3.session_tracer import SessionTracer
18
19
 
@@ -120,6 +121,8 @@ class RolloutRequest(BaseModel):
120
121
  # Optional run/session context
121
122
  training_session_id: str | None = None
122
123
  synth_base_url: str | None = None
124
+ # Mode controls URL transformation: REQUIRED to make intent explicit
125
+ mode: RolloutMode
123
126
 
124
127
 
125
128
  class RolloutStep(BaseModel):
@@ -140,6 +143,7 @@ class RolloutTrajectory(BaseModel):
140
143
  final: dict[str, Any] | None = None
141
144
  length: int
142
145
  decision_samples: list[dict[str, Any]] | None = None
146
+ inference_url: str | None = None
143
147
 
144
148
 
145
149
  def _normalize_step_strategy(raw_strategy: Any) -> str:
@@ -452,11 +456,12 @@ class RolloutMetrics(BaseModel):
452
456
  class RolloutResponse(BaseModel):
453
457
  run_id: str
454
458
  trajectories: list[RolloutTrajectory]
455
- branches: dict[str, list[str]] = {}
459
+ branches: dict[str, list[str]] = Field(default_factory=dict)
456
460
  metrics: RolloutMetrics
457
461
  aborted: bool = False
458
462
  ops_executed: int = 0
459
463
  trace: dict[str, Any] | None = None
464
+ pipeline_metadata: dict[str, Any] = Field(default_factory=dict)
460
465
 
461
466
 
462
467
  class RolloutTracingContext:
@@ -567,7 +572,7 @@ class RolloutTracingContext:
567
572
  try:
568
573
  await self.tracer.record_message(
569
574
  content=self._prompt_payload(entry, role="system"),
570
- message_type="policy_system_prompt",
575
+ message_type="system", # Use standard message type
571
576
  metadata=self._message_metadata(),
572
577
  )
573
578
  except Exception as exc:
@@ -576,11 +581,16 @@ class RolloutTracingContext:
576
581
  try:
577
582
  await self.tracer.record_message(
578
583
  content=self._prompt_payload(entry, role="user"),
579
- message_type="policy_user_prompt",
584
+ message_type="user", # Use standard message type
580
585
  metadata=self._message_metadata(),
581
586
  )
582
587
  except Exception as exc:
583
588
  logger.debug("TRACING_USER_MSG_FAIL: %s", exc)
589
+
590
+ # Debug: Check message count
591
+ if self.tracer and self.tracer._current_trace:
592
+ msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
593
+ logger.info(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages in trace")
584
594
 
585
595
  def _content_to_text(self, content: Any) -> str:
586
596
  if isinstance(content, str):
@@ -656,8 +666,8 @@ class RolloutTracingContext:
656
666
  try:
657
667
  await self.tracer.record_message(
658
668
  content=self._safe_json(tool_calls),
659
- message_type="policy_tool_call",
660
- metadata=self._message_metadata(),
669
+ message_type="assistant", # Map to standard assistant message type
670
+ metadata={**self._message_metadata(), "is_tool_call": True},
661
671
  )
662
672
  except Exception as exc:
663
673
  logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
@@ -928,11 +938,22 @@ class RolloutTracingContext:
928
938
  except Exception as exc:
929
939
  logger.debug("TRACING_OUTCOME_FAIL: %s", exc)
930
940
  try:
941
+ # Debug: Check message count before end_session
942
+ if self.tracer._current_trace:
943
+ msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
944
+ logger.info(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace")
945
+
931
946
  self.session_trace = await self.tracer.end_session()
932
- if self.session_trace is not None:
947
+
948
+ # Debug: Check if session was saved
949
+ if self.session_trace:
950
+ logger.info(f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}")
933
951
  self.session_trace.metadata.update(self.metadata_updates)
952
+ logger.info(f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}")
953
+ else:
954
+ logger.warning("[TRACE_DEBUG] end_session returned None!")
934
955
  except Exception as exc:
935
- logger.debug("TRACING_END_SESSION_FAIL: %s", exc)
956
+ logger.warning(f"TRACING_END_SESSION_FAIL: {exc}", exc_info=True)
936
957
  self.session_trace = None
937
958
  with contextlib.suppress(Exception):
938
959
  await self.tracer.close()
@@ -964,10 +985,15 @@ class RolloutTracingContext:
964
985
  def build_trace_payload(self, session_trace: Any) -> dict[str, Any] | None:
965
986
  if not self.return_trace or session_trace is None:
966
987
  return None
967
- if self.trace_format == "full":
988
+
989
+ # For both "full" and "structured" formats, return the complete session trace
990
+ # The CLI (synth-ai eval) expects this for proper trace storage
991
+ if self.trace_format in ("full", "structured"):
968
992
  payload = session_trace.to_dict()
969
993
  payload.setdefault("metadata", {}).update(self.metadata_updates)
970
994
  return payload
995
+
996
+ # For "compact" format, return only summary stats
971
997
  metadata = dict(session_trace.metadata)
972
998
  metadata.update(self.metadata_updates)
973
999
  return {
@@ -1056,12 +1082,14 @@ async def execute_rollout(
1056
1082
  req: Request,
1057
1083
  ) -> RolloutResponse:
1058
1084
  """Execute a rollout with coordinated environment and policy steps."""
1085
+ logger.info("ROLLOUT: mode = %s", request.mode)
1086
+
1059
1087
  # Emit rollout identifier early for correlation
1060
1088
  with contextlib.suppress(Exception):
1061
1089
  _rid = getattr(request, "run_id", None)
1062
1090
  _pol = getattr(request.policy, "policy_name", None) or getattr(request.policy, "policy_id", None)
1063
1091
  _env = getattr(request.env, "env_name", None) or getattr(request.env, "env_id", None)
1064
- logger.info("ROLLOUT_BEGIN: run_id=%s policy=%s env=%s", _rid, _pol, _env)
1092
+ logger.info("ROLLOUT_BEGIN: run_id=%s policy=%s env=%s mode=%s", _rid, _pol, _env, request.mode)
1065
1093
  print(f"[rollout] begin run_id={_rid} policy={_pol} env={_env}", flush=True)
1066
1094
  # Enforce per-episode step cap via env-specific parameters; default to 20 if omitted
1067
1095
  try:
@@ -1150,14 +1178,6 @@ async def execute_rollout(
1150
1178
  logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
1151
1179
  tracing_context = RolloutTracingContext(tracer_instance, request, req)
1152
1180
  await tracing_context.start_session()
1153
- # Print whether tracing is active for this rollout
1154
- try:
1155
- print(
1156
- f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
1157
- flush=True,
1158
- )
1159
- except Exception:
1160
- pass
1161
1181
 
1162
1182
  # Register run
1163
1183
  registry.register_run(request.run_id)
@@ -1271,6 +1291,7 @@ async def execute_rollout(
1271
1291
  config=_policy_config,
1272
1292
  rl_run_id=request.run_id,
1273
1293
  bound_env_id=env_id,
1294
+ mode=request.mode, # Pass through mode for URL transformation control
1274
1295
  ),
1275
1296
  req,
1276
1297
  )
@@ -1601,16 +1622,21 @@ async def execute_rollout(
1601
1622
 
1602
1623
  elif op == "env":
1603
1624
  if not pending_tool_calls:
1625
+ # Instead of failing, inject a no-op action to keep the rollout going
1604
1626
  with contextlib.suppress(Exception):
1605
1627
  logger.warning(
1606
- "POLICY_STEP_FAIL: missing tool_calls; failing rollout run_id=%s op_idx=%s",
1628
+ "POLICY_STEP_NOOP: missing tool_calls; injecting noop action run_id=%s op_idx=%s",
1607
1629
  request.run_id,
1608
1630
  str(op_idx),
1609
1631
  )
1610
- raise HTTPException(
1611
- status_code=500,
1612
- detail="policy_step_failed: missing tool_calls (no_tool_calls)",
1613
- )
1632
+ # Create a noop tool call in the format expected by the environment
1633
+ pending_tool_calls = [
1634
+ {
1635
+ "id": f"noop_{op_idx}",
1636
+ "tool": "interact",
1637
+ "arguments": {"action": "noop"},
1638
+ }
1639
+ ]
1614
1640
 
1615
1641
  # Environment step
1616
1642
  from .environment_routes import EnvStepRequest, step_environment
@@ -1843,14 +1869,73 @@ async def execute_rollout(
1843
1869
  timing_final.setdefault("overhead_ms", 0.0)
1844
1870
 
1845
1871
  # Build trajectory
1846
- # Extract inference_url from policy meta
1872
+ # Extract inference_url from policy config (REQUIRED for trace correlation)
1873
+ # The trainer sets this in policy config with ?cid=... parameter
1847
1874
  inference_url = None
1848
- if policy_handle is not None:
1875
+
1876
+ # Try policy config from request first (most reliable source)
1877
+ try:
1878
+ policy_config_snapshot = (
1879
+ request.policy.config if isinstance(request.policy.config, dict) else {}
1880
+ )
1881
+ inference_url = policy_config_snapshot.get("inference_url")
1882
+ if inference_url:
1883
+ logger.info(
1884
+ "ROLLOUT_TRAJECTORY: extracted inference_url from request.policy.config run_id=%s url=%s",
1885
+ request.run_id,
1886
+ inference_url,
1887
+ )
1888
+ except Exception as exc:
1889
+ logger.warning(
1890
+ "ROLLOUT_TRAJECTORY: failed to get inference_url from request.policy.config run_id=%s: %s",
1891
+ request.run_id,
1892
+ exc,
1893
+ )
1894
+
1895
+ # Fallback: Try policy handle snapshot (if request.policy.config failed)
1896
+ if not inference_url and policy_handle is not None:
1849
1897
  try:
1850
1898
  policy_snapshot = policy_handle.snapshot()
1851
1899
  inference_url = policy_snapshot.get("config", {}).get("inference_url")
1852
- except Exception:
1853
- pass
1900
+ if inference_url:
1901
+ logger.info(
1902
+ "ROLLOUT_TRAJECTORY: extracted inference_url from policy_handle.snapshot run_id=%s url=%s",
1903
+ request.run_id,
1904
+ inference_url,
1905
+ )
1906
+ except Exception as exc:
1907
+ logger.warning(
1908
+ "ROLLOUT_TRAJECTORY: failed to snapshot policy for run_id=%s policy_id=%s: %s",
1909
+ request.run_id,
1910
+ policy_id,
1911
+ exc,
1912
+ )
1913
+
1914
+ # ASSERTION: inference_url MUST be present (required by RolloutTrajectory schema)
1915
+ if not inference_url:
1916
+ raise ValueError(
1917
+ f"FATAL: inference_url is required but not found!\n"
1918
+ f"\n"
1919
+ f"run_id: {request.run_id}\n"
1920
+ f"policy_id: {policy_id}\n"
1921
+ f"policy_config_keys: {list(policy_config_snapshot.keys()) if 'policy_config_snapshot' in locals() else 'N/A'}\n"
1922
+ f"\n"
1923
+ f"The trainer MUST set inference_url in policy config with ?cid=... parameter.\n"
1924
+ f"This is required for trace correlation and hydration.\n"
1925
+ )
1926
+
1927
+ # policy_config_snapshot already set above in try block (line 1876-1878)
1928
+ # Ensure it exists for logging below
1929
+ if 'policy_config_snapshot' not in locals():
1930
+ policy_config_snapshot = {}
1931
+
1932
+ logger.info(
1933
+ "ROLLOUT_TRAJECTORY: run_id=%s policy_id=%s inference_url=%s trace_id=%s",
1934
+ request.run_id,
1935
+ policy_id,
1936
+ inference_url,
1937
+ policy_config_snapshot.get("trace_correlation_id"),
1938
+ )
1854
1939
 
1855
1940
  trajectory = RolloutTrajectory(
1856
1941
  env_id=env_id,
@@ -1948,12 +2033,17 @@ async def execute_rollout(
1948
2033
  )
1949
2034
  finalized = True
1950
2035
  trace_payload = tracing_context.build_trace_payload(session_trace)
2036
+
2037
+ # Debug: Check trace payload
2038
+ logger.info(f"[TRACE_DEBUG] trace_payload is None: {trace_payload is None}, return_trace={tracing_context.return_trace}")
2039
+ if trace_payload:
2040
+ logger.info(f"[TRACE_DEBUG] trace_payload keys: {list(trace_payload.keys())}")
1951
2041
 
1952
2042
  # Hard-fail if no steps executed (avg_turns == 0 scenario)
1953
2043
  if metrics.num_steps <= 0:
1954
2044
  raise HTTPException(status_code=500, detail="no_steps_executed: avg_turns == 0")
1955
2045
 
1956
- return RolloutResponse(
2046
+ response = RolloutResponse(
1957
2047
  run_id=request.run_id,
1958
2048
  trajectories=[trajectory],
1959
2049
  branches={},
@@ -1962,6 +2052,16 @@ async def execute_rollout(
1962
2052
  ops_executed=ops_executed,
1963
2053
  trace=trace_payload,
1964
2054
  )
2055
+ logger.info(
2056
+ "ROLLOUT_RESPONSE: run_id=%s aborted=%s ops_executed=%s metrics_steps=%s trace_present=%s pipeline_metadata=%s",
2057
+ request.run_id,
2058
+ aborted,
2059
+ ops_executed,
2060
+ metrics.num_steps,
2061
+ bool(trace_payload),
2062
+ response.pipeline_metadata,
2063
+ )
2064
+ return response
1965
2065
 
1966
2066
  except Exception as e:
1967
2067
  logger.error(f"Rollout failed for run {request.run_id}: {e}")