synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,178 @@
1
+ """Utility classes for running swe-mini environments on Morph Cloud."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import contextlib
6
+ import os
7
+ import shlex
8
+ import time
9
+ from dataclasses import dataclass, field
10
+ from typing import Any, Dict
11
+
12
+ _IMPORT_ERROR: Exception | None = None
13
+
14
+ try: # pragma: no cover - optional dependency
15
+ from morphcloud.api import MorphCloudClient
16
+ except Exception as exc: # pragma: no cover - optional dependency
17
+ MorphCloudClient = None # type: ignore[assignment]
18
+ _IMPORT_ERROR = exc
19
+
20
+
21
+ def _quote_env_var(key: str, value: str) -> str:
22
+ """Return a safe shell export statement."""
23
+ return f"export {key}={shlex.quote(value)}"
24
+
25
+
26
+ def _now() -> float:
27
+ return time.time()
28
+
29
+
30
+ @dataclass
31
+ class MorphSandboxBackend:
32
+ """Thin wrapper around Morph Cloud instances for command execution.
33
+
34
+ The API mirrors the subset consumed by :class:`MiniSweEnvironmentWrapper`:
35
+ we expose an ``execute`` method that matches the mini-swe environment shape.
36
+ """
37
+
38
+ snapshot_id: str | None = None
39
+ image_id: str | None = None
40
+ cwd: str = "/workspace"
41
+ env: Dict[str, str] | None = None
42
+ metadata: Dict[str, str] | None = None
43
+ vcpus: int = 4
44
+ memory_mb: int = 8192
45
+ disk_mb: int = 65536
46
+ startup_timeout: int = 600
47
+
48
+ _client: MorphCloudClient = field(init=False)
49
+ _instance: Any = field(init=False, default=None)
50
+ _last_exec: Dict[str, Any] = field(init=False, default_factory=dict)
51
+ _started_at: float | None = field(init=False, default=None)
52
+
53
+ def __post_init__(self) -> None:
54
+ if MorphCloudClient is None: # pragma: no cover - optional dependency
55
+ raise RuntimeError(
56
+ "morphcloud package is required for Morph environments. "
57
+ "Install with `pip install morphcloud`."
58
+ ) from _IMPORT_ERROR
59
+
60
+ api_key = os.getenv("MORPH_API_KEY", "")
61
+ if not api_key:
62
+ raise RuntimeError("Set MORPH_API_KEY before using the Morph backend.")
63
+
64
+ # Normalise metadata/env early to avoid shared references.
65
+ self.metadata = {str(k): str(v) for k, v in (self.metadata or {}).items()}
66
+ self.env = {str(k): str(v) for k, v in (self.env or {}).items()}
67
+ self.cwd = self.cwd or "/workspace"
68
+
69
+ self._client = MorphCloudClient()
70
+
71
+ # Public API -----------------------------------------------------------------
72
+
73
+ def execute(self, command: str, timeout: int | None = None) -> Dict[str, Any]:
74
+ """Execute ``command`` inside the Morph instance."""
75
+ if not command.strip():
76
+ command = "true"
77
+
78
+ instance = self._ensure_instance()
79
+
80
+ script_parts = []
81
+ for key, value in self.env.items():
82
+ script_parts.append(_quote_env_var(key, value))
83
+ if self.cwd:
84
+ script_parts.append(f"cd {shlex.quote(self.cwd)}")
85
+ script_parts.append(command)
86
+
87
+ script = " && ".join(script_parts)
88
+ if timeout:
89
+ wrapped = f"timeout {int(timeout)}s bash -lc {shlex.quote(script)}"
90
+ else:
91
+ wrapped = script
92
+
93
+ shell_cmd = f"bash -lc {shlex.quote(wrapped)}"
94
+ started = _now()
95
+ result = instance.exec(shell_cmd)
96
+ duration = _now() - started
97
+
98
+ payload = {
99
+ "output": (result.stdout or ""),
100
+ "stderr": (result.stderr or ""),
101
+ "returncode": getattr(result, "exit_code", None),
102
+ "duration": duration,
103
+ }
104
+ self._last_exec = payload
105
+ return payload
106
+
107
+ def close(self) -> None:
108
+ """Stops the Morph instance if one is running."""
109
+ instance = getattr(self, "_instance", None)
110
+ if not instance:
111
+ return
112
+ try:
113
+ instance.stop()
114
+ except Exception: # pragma: no cover - best-effort shutdown
115
+ pass
116
+ finally:
117
+ self._instance = None
118
+
119
+ # Internal helpers -----------------------------------------------------------
120
+
121
+ def _ensure_instance(self):
122
+ instance = getattr(self, "_instance", None)
123
+ if instance is not None:
124
+ return instance
125
+
126
+ snapshot_id = (
127
+ self.snapshot_id
128
+ or os.getenv("SWE_MINI_MORPH_SNAPSHOT_ID")
129
+ or os.getenv("MORPH_SNAPSHOT_ID")
130
+ )
131
+ metadata = dict(self.metadata)
132
+
133
+ if snapshot_id:
134
+ instance = self._client.instances.start(snapshot_id=snapshot_id, metadata=metadata or None)
135
+ else:
136
+ image_id = (
137
+ self.image_id
138
+ or os.getenv("SWE_MINI_MORPH_IMAGE_ID")
139
+ or os.getenv("MORPH_IMAGE_ID")
140
+ or "morphvm-minimal"
141
+ )
142
+ snapshot = self._client.snapshots.create(
143
+ image_id=image_id,
144
+ vcpus=self.vcpus,
145
+ memory=self.memory_mb,
146
+ disk_size=self.disk_mb,
147
+ )
148
+ instance = self._client.instances.start(snapshot_id=snapshot.id, metadata=metadata or None)
149
+ self.snapshot_id = snapshot.id
150
+
151
+ self._instance = instance
152
+ self._started_at = _now()
153
+ self._wait_until_ready(instance)
154
+ self._ensure_cwd(instance)
155
+ return instance
156
+
157
+ def _wait_until_ready(self, instance) -> None:
158
+ deadline = _now() + float(self.startup_timeout)
159
+ while True:
160
+ try:
161
+ instance.wait_until_ready()
162
+ break
163
+ except Exception as exc: # pragma: no cover - SDK may raise while polling
164
+ if _now() > deadline:
165
+ raise TimeoutError(f"Morph instance did not become ready within {self.startup_timeout}s") from exc
166
+ time.sleep(5.0)
167
+
168
+ def _ensure_cwd(self, instance) -> None:
169
+ if not self.cwd:
170
+ return
171
+ try:
172
+ instance.exec(f"bash -lc {shlex.quote(f'mkdir -p {self.cwd}')}")
173
+ except Exception as exc: # pragma: no cover - surface friendly error
174
+ raise RuntimeError(f"Failed to create remote workspace {self.cwd!r}: {exc}") from exc
175
+
176
+ def __del__(self) -> None: # pragma: no cover - defensive cleanup
177
+ with contextlib.suppress(Exception):
178
+ self.close()
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
6
6
 
7
7
  ## Local development
8
8
  ```bash
9
- uvx synth-ai serve grpo-crafter --port 8001
9
+ uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
10
10
  # Optional extras:
11
11
  # --env-file path/to/.env # load additional environment variables
12
12
  # --reload # enable uvicorn auto-reload
@@ -7,10 +7,15 @@ import logging
7
7
  import os
8
8
  import sys
9
9
  from collections.abc import Iterable, Sequence
10
+ from contextlib import suppress
10
11
  from dataclasses import dataclass
12
+ from datetime import UTC, datetime
11
13
  from pathlib import Path
12
14
  from typing import Any
13
15
 
16
+ from fastapi import HTTPException
17
+ from pydantic import BaseModel
18
+
14
19
  from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
15
20
  from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
16
21
  from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
@@ -614,16 +619,14 @@ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
614
619
 
615
620
  coerced = request.model_copy(update={"env": updated_env, "policy": updated_policy, "ops": ops_override})
616
621
 
617
- try:
622
+ with suppress(Exception):
618
623
  print(
619
624
  "[rollout] remapped math request -> crafter "
620
625
  f"(env={request.env.env_name!r}→{coerced.env.env_name!r}, "
621
626
  f"policy={request.policy.policy_name!r}→{coerced.policy.policy_name!r})",
622
627
  flush=True,
623
628
  )
624
- except Exception:
625
- pass
626
- try:
629
+ with suppress(Exception):
627
630
  logger.info(
628
631
  "ROLLOUT_ALIAS: remapped math env/policy to crafter (env=%s→%s, policy=%s→%s)",
629
632
  request.env.env_name,
@@ -631,8 +634,6 @@ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
631
634
  request.policy.policy_name,
632
635
  coerced.policy.policy_name,
633
636
  )
634
- except Exception:
635
- pass
636
637
 
637
638
  return coerced
638
639
 
@@ -654,12 +655,20 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
654
655
  if stripped:
655
656
  return stripped
656
657
 
657
- return extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=mode)
658
+ return extract_trace_correlation_id(policy_cfg.get("inference_url"))
658
659
 
659
660
 
660
661
  async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
661
662
  request = _coerce_math_to_crafter(request)
662
663
 
664
+ record_cfg = request.record.model_copy(
665
+ update={
666
+ "return_trace": True,
667
+ "trace_format": "structured",
668
+ }
669
+ )
670
+ request = request.model_copy(update={"record": record_cfg})
671
+
663
672
  policy_cfg = dict(request.policy.config or {})
664
673
  logger.info(
665
674
  "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
@@ -803,11 +812,38 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
803
812
  trace_correlation_id,
804
813
  )
805
814
  data = legacy_response.model_dump()
815
+ logger.debug(
816
+ "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
817
+ sorted(data.keys()),
818
+ bool(data.get("trace")),
819
+ )
806
820
  metrics = data.get("metrics", {}) or {}
807
821
  metrics.setdefault("outcome_score", None)
808
822
  metrics.setdefault("events_score", None)
809
823
  metrics.setdefault("details", {})
810
824
  data["metrics"] = metrics
825
+
826
+ if data.get("trace") is None:
827
+ legacy_trace = getattr(legacy_response, "trace", None)
828
+ if legacy_trace is not None:
829
+ data["trace"] = legacy_trace
830
+ else:
831
+ tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
832
+ if callable(tracer_factory):
833
+ tracer = tracer_factory()
834
+ logger.debug("ROLLOUT_EXEC: trace backfill factory=%s", type(tracer))
835
+ if isinstance(tracer, SessionTracer):
836
+ try:
837
+ await tracer.initialize()
838
+ if tracer.db is not None:
839
+ trace_row = await tracer.db.get_session_trace(request.run_id)
840
+ if trace_row is not None:
841
+ data["trace"] = trace_row
842
+ except Exception as exc:
843
+ logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
844
+ finally:
845
+ with suppress(Exception):
846
+ await tracer.close()
811
847
 
812
848
  # Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
813
849
  # Use fallback if somehow missing
@@ -823,12 +859,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
823
859
  if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
824
860
  existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
825
861
  data["pipeline_metadata"] = existing_meta
826
-
862
+
827
863
  # Add trace_correlation_id to each trajectory (required for RL training pipeline)
828
864
  if "trajectories" in data:
865
+ normalized_trajs: list[dict[str, Any]] = []
829
866
  for traj in data.get("trajectories", []):
830
- if isinstance(traj, dict):
831
- traj["trace_correlation_id"] = final_cid
867
+ if isinstance(traj, BaseModel):
868
+ traj_dict = traj.model_dump()
869
+ elif isinstance(traj, dict):
870
+ traj_dict = dict(traj)
871
+ else:
872
+ continue
873
+ traj_dict["trace_correlation_id"] = final_cid
874
+ if not traj_dict.get("inference_url"):
875
+ inferred_url = policy_cfg.get("inference_url")
876
+ if inferred_url:
877
+ traj_dict["inference_url"] = inferred_url
878
+ normalized_trajs.append(traj_dict)
879
+ if normalized_trajs:
880
+ data["trajectories"] = normalized_trajs
881
+ logger.info(
882
+ "ROLLOUT_EXEC: normalized trajectory sample run_id=%s inference_url=%s",
883
+ request.run_id,
884
+ normalized_trajs[0].get("inference_url") if normalized_trajs else None,
885
+ )
832
886
  logger.info(
833
887
  "ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
834
888
  request.run_id,
@@ -847,6 +901,12 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
847
901
  request.run_id,
848
902
  existing_meta,
849
903
  )
904
+
905
+ if data.get("trace") is None:
906
+ raise HTTPException(
907
+ status_code=500,
908
+ detail="trace_payload_missing: task app did not emit a SessionTrace",
909
+ )
850
910
 
851
911
  # ASSERTION: Verify trace_correlation_id is present in response at all required levels
852
912
  assert "trace_correlation_id" in data, (
@@ -3,7 +3,7 @@
3
3
  This module now delegates to the TaskAppConfig defined in the colocated example at
4
4
  `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
6
- `uvx synth-ai serve grpo-crafter` for local development and testing.
6
+ `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
@@ -59,6 +59,13 @@ class CrafterPolicy(Policy):
59
59
  self.trajectory_history: list[dict[str, Any]] = [] # env/policy step records
60
60
 
61
61
  async def initialize(self, config: dict[str, Any]) -> None:
62
+ # DEBUG: Log the incoming config
63
+ import logging
64
+ _logger = logging.getLogger(__name__)
65
+ _logger.debug(f"🔊 [POLICY_INIT] Received config keys: {list(config.keys())}")
66
+ _logger.debug(f"🔊 [POLICY_INIT] use_vision in config: {'use_vision' in config}, value: {config.get('use_vision')}")
67
+ _logger.debug(f"🔊 [POLICY_INIT] image_only_mode in config: {'image_only_mode' in config}, value: {config.get('image_only_mode')}")
68
+
62
69
  if "inference_url" in config:
63
70
  self.inference_url = config["inference_url"]
64
71
  if "model" in config:
@@ -67,6 +74,7 @@ class CrafterPolicy(Policy):
67
74
  self.use_tools = bool(config["use_tools"])
68
75
  if "use_vision" in config:
69
76
  self.use_vision = bool(config["use_vision"])
77
+ _logger.debug(f"🔊 [POLICY_INIT] Set use_vision={self.use_vision} from config")
70
78
  if "image_only_mode" in config:
71
79
  self.image_only_mode = bool(config["image_only_mode"])
72
80
  # If image_only_mode is enabled, automatically enable vision
@@ -97,6 +105,9 @@ class CrafterPolicy(Policy):
97
105
  self.history_messages = []
98
106
  self.turn_index = 0
99
107
  self.trajectory_history = []
108
+
109
+ # DEBUG: Log final state
110
+ _logger.debug(f"🔊 [POLICY_INIT] FINAL STATE: use_vision={self.use_vision}, image_only_mode={self.image_only_mode}, model={self.model}")
100
111
 
101
112
  def _append_user_observation(self, observation_text: str) -> None:
102
113
  self.history_messages.append({"role": "user", "content": observation_text})
@@ -131,10 +142,36 @@ class CrafterPolicy(Policy):
131
142
  history=history,
132
143
  turn=turn,
133
144
  image_parts=image_parts,
145
+ image_only_mode=self.image_only_mode,
134
146
  )
147
+
148
+ # DEBUG: Log message structure
149
+ import logging
150
+ _logger = logging.getLogger(__name__)
151
+ _logger.debug(f"🔊 [BUILD_REQUEST] Built {len(messages)} messages")
152
+ for idx, msg in enumerate(messages):
153
+ role = msg.get("role")
154
+ content = msg.get("content")
155
+ if isinstance(content, list):
156
+ _logger.debug(f"🔊 [BUILD_REQUEST] Message[{idx}] role={role}, content=list[{len(content)}]")
157
+ for part_idx, part in enumerate(content):
158
+ if isinstance(part, dict):
159
+ part_type = part.get("type")
160
+ _logger.debug(f"🔊 [BUILD_REQUEST] Part[{part_idx}]: type={part_type}")
161
+ else:
162
+ content_len = len(str(content)) if content else 0
163
+ _logger.debug(f"🔊 [BUILD_REQUEST] Message[{idx}] role={role}, content_len={content_len}")
164
+
135
165
  payload: dict[str, Any] = {
136
166
  "messages": messages,
137
167
  }
168
+
169
+ # DEBUG: Verify messages are in payload correctly
170
+ _logger.debug(f"🔊 [BUILD_REQUEST_PAYLOAD] Created payload with {len(payload['messages'])} messages")
171
+ for idx, msg in enumerate(payload["messages"]):
172
+ content = msg.get("content")
173
+ _logger.debug(f"🔊 [BUILD_REQUEST_PAYLOAD] Payload message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}, len={len(content) if isinstance(content, list) else len(str(content)) if content else 0}")
174
+
138
175
  if self.model is not None:
139
176
  payload["model"] = self.model
140
177
  # Thinking controls
@@ -160,6 +197,8 @@ class CrafterPolicy(Policy):
160
197
  if self.use_tools:
161
198
  payload["tools"] = TOOLS_SCHEMA
162
199
  payload["tool_choice"] = "required"
200
+ payload["function_call"] = {"name": "interact_many"}
201
+ payload["parallel_tool_calls"] = False
163
202
  # Ensure the inference server injects family-specific stop sequences
164
203
  # to terminate immediately after the first tool call for compliance.
165
204
  payload["stop_after_tool_calls"] = 1
@@ -170,13 +209,7 @@ class CrafterPolicy(Policy):
170
209
  response: dict[str, Any],
171
210
  use_tools: bool = True,
172
211
  ) -> list[dict[str, Any]]:
173
- """Turn an inference response into environment tool calls.
174
-
175
- - If tools were used, expect tool_calls-compatible output and forward as-is
176
- in our simple JSON format: {"tool_name": str, "arguments": {...}}.
177
- - If no tools, parse plain-text actions using CrafterReActAgent parser and
178
- wrap them into a single interact_many tool call.
179
- """
212
+ """Turn an inference response into environment tool calls."""
180
213
  # First check if we got actual tool calls
181
214
  choices = response.get("choices", [])
182
215
  tool_calls: list[dict[str, Any]] = []
@@ -235,24 +268,6 @@ class CrafterPolicy(Policy):
235
268
  normalized.append(tc)
236
269
  return normalized
237
270
 
238
- # Otherwise, parse plain text content for actions
239
- text = ""
240
- for choice in choices:
241
- msg = choice.get("message", {})
242
- content = msg.get("content", "")
243
- if content:
244
- text = content
245
- break
246
-
247
- if text:
248
- # Try to parse actions from the text
249
- from .shared import parse_actions
250
-
251
- actions = parse_actions(text)
252
- if actions:
253
- # Wrap actions in interact_many tool call
254
- return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
255
-
256
271
  # No actions found
257
272
  return []
258
273
 
@@ -360,7 +375,18 @@ class CrafterPolicy(Policy):
360
375
  raw_candidate = metadata.get("raw_observation")
361
376
  if isinstance(raw_candidate, dict):
362
377
  raw_observation = raw_candidate
378
+
379
+ # DEBUG: Log image extraction
380
+ import logging
381
+ _logger = logging.getLogger(__name__)
382
+ _logger.debug(f"🔊 [POLICY] use_vision={self.use_vision}, has_raw_obs={raw_observation is not None}")
383
+ if raw_observation:
384
+ obs = raw_observation.get("observation", raw_observation)
385
+ data_url = obs.get("observation_image_data_url") if isinstance(obs, dict) else None
386
+ _logger.debug(f"🔊 [POLICY] has_data_url={data_url is not None}, url_preview={data_url[:50] if data_url else 'NONE'}...")
387
+
363
388
  image_parts = self._extract_image_parts(raw_observation)
389
+ _logger.debug(f"🔊 [POLICY] Extracted {len(image_parts)} image parts")
364
390
 
365
391
  payload = self.build_inference_request(
366
392
  combined_text,
@@ -368,7 +394,17 @@ class CrafterPolicy(Policy):
368
394
  turn=self.turn_index,
369
395
  image_parts=image_parts,
370
396
  )
371
- # print("Debugging only:; ", payload)
397
+
398
+ # DEBUG: Verify payload before returning
399
+ _logger.debug(f"🔊 [POLICY_STEP_RETURN] About to return payload with {len(payload.get('messages', []))} messages")
400
+ for idx, msg in enumerate(payload.get("messages", [])):
401
+ content = msg.get("content")
402
+ _logger.debug(f"🔊 [POLICY_STEP_RETURN] Return message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
403
+ if isinstance(content, list):
404
+ _logger.debug(f"🔊 [POLICY_STEP_RETURN] Content list has {len(content)} items")
405
+ # Add assertion to catch corruption early
406
+ assert len(content) > 0, f"Message content list is empty! This should contain images."
407
+
372
408
  meta_out = {
373
409
  "inference_url": self.inference_url,
374
410
  "inference_request": payload,
@@ -484,7 +520,7 @@ class CrafterPolicy(Policy):
484
520
  "claude-3", # All Claude 3 models support vision
485
521
  "gemini", # Gemini models
486
522
  "qwen-vl", # Qwen Vision-Language models
487
- "qwen2-vl", # Qwen2 VL
523
+ "qwen3-vl", # Qwen3 VL
488
524
  "pixtral", # Mistral's vision model
489
525
  "llava", # LLaVA models
490
526
  "phi-3-vision", # Microsoft Phi-3 Vision
@@ -45,8 +45,7 @@ class CrafterReActAgent:
45
45
  "Action policy:\n"
46
46
  "- Always return a single tool call: interact_many({actions: [...]})\n"
47
47
  "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
48
- "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
49
- "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
48
+ "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n\n"
50
49
  "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
51
50
  "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
52
51
  "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
@@ -50,20 +50,19 @@ class OpenAIClient:
50
50
  # Make a copy to avoid modifying the original
51
51
  fixed_request = request.copy()
52
52
 
53
- # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI, Groq);
54
- # strip fields those endpoints don't accept
53
+ # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI).
54
+ # Groq shares the API surface but we keep tool enforcement fields intact.
55
55
  is_openai = False
56
+ is_groq = False
56
57
  try:
57
58
  if isinstance(target_url, str):
58
59
  low = target_url.lower()
59
- is_openai = (
60
- ("openai.com" in low)
61
- or ("azure" in low and ".openai." in low)
62
- or ("groq.com" in low)
63
- or ("/openai" in low)
64
- or ("/proxy/groq" in low)
65
- or ("/proxy/openai" in low)
66
- )
60
+ if "groq.com" in low or "/proxy/groq" in low:
61
+ is_groq = True
62
+ elif ("openai.com" in low) or ("azure" in low and ".openai." in low) or (
63
+ "/proxy/openai" in low
64
+ ):
65
+ is_openai = True
67
66
  except Exception:
68
67
  is_openai = False
69
68
 
@@ -218,8 +217,20 @@ class OpenAIClient:
218
217
  # Do NOT fall back silently; surface the error so callers fail fast
219
218
  raise
220
219
 
220
+ # DEBUG: Log request BEFORE _fix_model_parameters
221
+ logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Request message[1] content type: {type(request.get('messages', [])[1].get('content') if len(request.get('messages', [])) > 1 else None)}")
222
+ if len(request.get("messages", [])) > 1:
223
+ msg1_content = request["messages"][1].get("content")
224
+ logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Message[1] content value: {msg1_content if not isinstance(msg1_content, list) else f'list[{len(msg1_content)}]'}")
225
+
221
226
  # Fix parameter compatibility for newer models
222
227
  processed_request = self._fix_model_parameters(request, target_url=url)
228
+
229
+ # DEBUG: Log request AFTER _fix_model_parameters
230
+ logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Processed message[1] content type: {type(processed_request.get('messages', [])[1].get('content') if len(processed_request.get('messages', [])) > 1 else None)}")
231
+ if len(processed_request.get("messages", [])) > 1:
232
+ msg1_content_post = processed_request["messages"][1].get("content")
233
+ logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
223
234
 
224
235
  # Log request (redact messages in production)
225
236
  logger.info(f"Inference POST target: {url}")
@@ -228,14 +239,32 @@ class OpenAIClient:
228
239
  with contextlib.suppress(Exception):
229
240
  keys_preview = sorted(processed_request.keys())
230
241
  logger.info(f"Request keys: {keys_preview}")
231
-
232
- # Final hard-guard for OpenAI: ensure unsupported field is not present
242
+ # DEBUG: Log message structure for vision debugging
243
+ if "messages" in processed_request:
244
+ msgs = processed_request["messages"]
245
+ if isinstance(msgs, list):
246
+ logger.debug(f"🔊 [OPENAI_CLIENT] Request has {len(msgs)} messages")
247
+ for idx, msg in enumerate(msgs):
248
+ if isinstance(msg, dict):
249
+ role = msg.get("role")
250
+ content = msg.get("content")
251
+ if isinstance(content, list):
252
+ logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content=list[{len(content)}]")
253
+ for part_idx, part in enumerate(content):
254
+ if isinstance(part, dict):
255
+ part_type = part.get("type")
256
+ logger.debug(f"🔊 [OPENAI_CLIENT] Part[{part_idx}]: type={part_type}")
257
+ else:
258
+ content_len = len(str(content)) if content else 0
259
+ logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
260
+
261
+ # Final hard-guard for OpenAI/Groq: drop unsupported field
233
262
  try:
234
- if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
263
+ low_url = url.lower()
264
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
235
265
  processed_request.pop("stop_after_tool_calls", None)
236
- logger.info("Removed stop_after_tool_calls for OpenAI request")
266
+ logger.info("Removed stop_after_tool_calls for %s request", "Groq/OpenAI")
237
267
  # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
238
- low_url = url.lower()
239
268
  if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
240
269
  processed_request, dict
241
270
  ):
@@ -516,47 +545,16 @@ class OpenAIClient:
516
545
  error_block.get("code") or error_block.get("type") or ""
517
546
  ).lower()
518
547
  if error_code in {"tool_use_failed", "tool_call_failed"}:
519
- logger.warning(
548
+ logger.error(
520
549
  {
521
550
  "tool_use_failed": True,
522
551
  "target": (base_url or self.base_url),
523
552
  "message": error_block.get("message") if isinstance(error_block, dict) else None,
524
553
  }
525
554
  )
526
- fallback_actions = ["move_right", "move_up", "do"]
527
- fallback_response = {
528
- "id": f"fallback-{int(time.time() * 1000)}",
529
- "object": "chat.completion",
530
- "created": int(time.time()),
531
- "model": processed_request.get("model"),
532
- "choices": [
533
- {
534
- "index": 0,
535
- "message": {
536
- "role": "assistant",
537
- "content": "",
538
- "tool_calls": [
539
- {
540
- "id": f"call_fallback_{int(time.time() * 1000)}",
541
- "type": "function",
542
- "function": {
543
- "name": "interact_many",
544
- "arguments": json.dumps(
545
- {"actions": fallback_actions}
546
- ),
547
- },
548
- }
549
- ],
550
- },
551
- "finish_reason": "tool_calls",
552
- }
553
- ],
554
- }
555
- if isinstance(response_data.get("usage"), dict):
556
- fallback_response["usage"] = response_data["usage"]
557
- if isinstance(error_block, dict):
558
- fallback_response["error"] = error_block
559
- return fallback_response
555
+ raise RuntimeError(
556
+ f"Inference 400 response (tool call failed): {error_block.get('message') if isinstance(error_block, dict) else 'Tool call failed'}"
557
+ ) from e
560
558
  # This is a different type of 400 error, don't retry
561
559
  try:
562
560
  redacted_headers = {}