synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,368 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Collect Crafter vision traces for SFT dataset creation.
4
+
5
+ Supports both:
6
+ 1. OpenAI models (gpt-5-nano, gpt-4o-mini) via OpenAI API
7
+ 2. Qwen-VL models via synth-ai hosted inference
8
+
9
+ Traces are stored in SQLite with full multimodal messages (text + base64 images)
10
+ ready for export to SFT JSONL format.
11
+
12
+ Requirements:
13
+ - For OpenAI: OPENAI_API_KEY environment variable
14
+ - For synth-ai: SYNTH_API_KEY environment variable
15
+
16
+ Usage:
17
+ # Collect with gpt-5-nano
18
+ uv run python examples/qwen_vl/collect_vision_traces.py \
19
+ --model gpt-5-nano \
20
+ --provider openai \
21
+ --episodes 100 \
22
+ --max-steps 50 \
23
+ --output-dir traces/gpt5nano_vision
24
+
25
+ # Collect with Qwen2-VL via synth
26
+ uv run python examples/qwen_vl/collect_vision_traces.py \
27
+ --model Qwen/Qwen2-VL-7B-Instruct \
28
+ --provider synth \
29
+ --episodes 100 \
30
+ --max-steps 50 \
31
+ --output-dir traces/qwen2vl_vision
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import argparse
37
+ import asyncio
38
+ import json
39
+ import logging
40
+ import os
41
+ from pathlib import Path
42
+ from typing import Any, cast
43
+ from uuid import uuid4
44
+
45
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.environment import (
46
+ CrafterEnvironmentWrapper,
47
+ )
48
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.policy import CrafterPolicy
49
+ from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
50
+ from synth_ai.environments.examples.crafter_classic.taskset import (
51
+ CrafterTaskInstance,
52
+ CrafterTaskInstanceMetadata,
53
+ )
54
+ from synth_ai.environments.tasks.core import Impetus, Intent
55
+
56
+ # Try importing trace storage
57
+ try:
58
+ from synth_ai.tracing_v3.storage import create_storage
59
+ from synth_ai.tracing_v3.storage.config import StorageBackend, StorageConfig
60
+ TRACING_AVAILABLE = True
61
+ except ImportError:
62
+ print("Warning: Tracing storage not available. Traces will not be persisted.")
63
+ TRACING_AVAILABLE = False
64
+
65
+
66
+ def _get_openai_client():
67
+ """Get OpenAI client."""
68
+ from openai import OpenAI
69
+
70
+ api_key = os.getenv("OPENAI_API_KEY")
71
+ if not api_key:
72
+ raise RuntimeError("OPENAI_API_KEY not set")
73
+ return OpenAI(api_key=api_key)
74
+
75
+
76
+ def _default_backend_base_url() -> str:
77
+ raw = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api").strip()
78
+ return raw if raw.endswith("/api") else f"{raw}/api"
79
+
80
+
81
+ def _get_synth_client():
82
+ """Get synth-ai inference client."""
83
+ from synth_ai.inference.client import InferenceClient
84
+
85
+ api_key = os.getenv("SYNTH_API_KEY")
86
+ if not api_key:
87
+ raise RuntimeError("SYNTH_API_KEY not set")
88
+ base_url = os.getenv("SYNTH_BASE_URL", _default_backend_base_url())
89
+ return InferenceClient(base_url=base_url, api_key=api_key)
90
+
91
+
92
+ def _build_task_instance(seed: int) -> CrafterTaskInstance:
93
+ """Create Crafter task instance."""
94
+ impetus = Impetus(instructions="Explore, survive, and unlock achievements.")
95
+ intent = Intent(
96
+ rubric={"goal": "Maximise Crafter achievements."},
97
+ gold_trajectories=None,
98
+ gold_state_diff={},
99
+ )
100
+ metadata = CrafterTaskInstanceMetadata(
101
+ difficulty="custom",
102
+ seed=seed,
103
+ num_trees_radius=0,
104
+ num_cows_radius=0,
105
+ num_hostiles_radius=0,
106
+ )
107
+ instance = CrafterTaskInstance(
108
+ id=uuid4(),
109
+ impetus=impetus,
110
+ intent=intent,
111
+ metadata=metadata,
112
+ is_reproducible=True,
113
+ initial_engine_snapshot=None,
114
+ )
115
+ setattr(instance, "config", {"seed": seed, "length": 256, "area": [64, 64]})
116
+ return instance
117
+
118
+
119
+ def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
120
+ """Normalize inference request for OpenAI API."""
121
+ request = dict(payload)
122
+ request["model"] = model
123
+
124
+ # Remove vendor-specific knobs
125
+ request.pop("stop_after_tool_calls", None)
126
+ request.pop("thinking_mode", None)
127
+ request.pop("thinking_budget", None)
128
+
129
+ # gpt-5 models have specific requirements
130
+ if "gpt-5" in model.lower():
131
+ # gpt-5-nano only supports temperature=1 (default)
132
+ request.pop("temperature", None) # Remove custom temperature
133
+ request.setdefault("max_completion_tokens", 512)
134
+ request.pop("max_tokens", None) # Remove if present
135
+ else:
136
+ # Older models use max_tokens and support custom temperature
137
+ request.setdefault("temperature", temperature)
138
+ max_completion = request.pop("max_completion_tokens", None)
139
+ if max_completion is not None:
140
+ request["max_tokens"] = max_completion
141
+ else:
142
+ request.setdefault("max_tokens", 512)
143
+
144
+ return request
145
+
146
+
147
+ async def collect_traces(
148
+ model: str,
149
+ provider: str,
150
+ num_episodes: int,
151
+ max_steps: int,
152
+ seed_start: int,
153
+ output_dir: Path,
154
+ temperature: float,
155
+ ):
156
+ """Collect vision traces for SFT."""
157
+ # Setup tracing store
158
+ if not TRACING_AVAILABLE:
159
+ raise RuntimeError("Tracing storage not available. Cannot persist traces.")
160
+
161
+ output_dir.mkdir(parents=True, exist_ok=True)
162
+ db_path = output_dir / "rollouts.db"
163
+ storage_config = StorageConfig(
164
+ backend=StorageBackend.SQLITE,
165
+ connection_string=f"sqlite+aiosqlite:///{db_path}",
166
+ )
167
+ tracing_store = create_storage(storage_config)
168
+ await tracing_store.initialize()
169
+
170
+ # Setup inference client
171
+ if provider == "openai":
172
+ client = _get_openai_client()
173
+ inference_url = "openai://chat-completions"
174
+ elif provider == "synth":
175
+ client = _get_synth_client()
176
+ inference_url = "synth://inference"
177
+ else:
178
+ raise ValueError(f"Unknown provider: {provider}")
179
+
180
+ print(f"🎮 Collecting {num_episodes} episodes with {model}")
181
+ print(f" Provider: {provider}")
182
+ print(f" Max steps: {max_steps}")
183
+ print(f" Output: {output_dir}")
184
+ print(f" Database: {db_path}")
185
+ print()
186
+
187
+ total_steps = 0
188
+ total_achievements = 0
189
+
190
+ for episode_id in range(num_episodes):
191
+ seed = seed_start + episode_id
192
+
193
+ # Build task instance
194
+ task_instance = _build_task_instance(seed)
195
+ env = CrafterClassicEnvironment(task_instance)
196
+ wrapper = CrafterEnvironmentWrapper(env, seed=seed)
197
+
198
+ # Initialize policy (vision auto-detected from model name)
199
+ policy = CrafterPolicy(inference_url=inference_url, model=model)
200
+ await policy.initialize({
201
+ "use_tools": True,
202
+ "model": model,
203
+ "temperature": temperature,
204
+ "max_tokens": 512,
205
+ })
206
+
207
+ observation_packet = await wrapper.initialize()
208
+
209
+ steps_taken = 0
210
+ achievements = set()
211
+
212
+ # Run episode
213
+ for step_idx in range(max_steps):
214
+ obs_dict = observation_packet.get("observation")
215
+ if not isinstance(obs_dict, dict):
216
+ break
217
+
218
+ # Format observation
219
+ obs_text = policy._format_observation_for_llm(observation_packet) # noqa: SLF001
220
+
221
+ # Get tool calls from policy
222
+ tool_calls, meta = await policy.step(
223
+ observation_text=obs_text,
224
+ metadata={"raw_observation": observation_packet},
225
+ )
226
+ if "inference_request" not in meta:
227
+ break
228
+
229
+ inference_request = meta["inference_request"]
230
+
231
+ # Call inference
232
+ if provider == "openai":
233
+ normalized_request = _normalise_openai_request(
234
+ inference_request,
235
+ model=model,
236
+ temperature=temperature,
237
+ )
238
+ response = client.chat.completions.create(**normalized_request)
239
+ response_dict = response.model_dump()
240
+ else: # synth
241
+ response_dict = await client.create_chat_completion(
242
+ model=model,
243
+ messages=inference_request["messages"],
244
+ temperature=temperature,
245
+ max_tokens=512,
246
+ tools=inference_request.get("tools"),
247
+ )
248
+
249
+ # Parse tool calls
250
+ assistant_tool_calls = CrafterPolicy.parse_response_to_tool_calls(
251
+ response_dict,
252
+ use_tools=policy.use_tools,
253
+ )
254
+ if not assistant_tool_calls:
255
+ break
256
+
257
+ # Store trace
258
+ assistant_message = response_dict["choices"][0].get("message", {})
259
+ trace_messages = inference_request["messages"] + [assistant_message]
260
+
261
+ tracing_store_any = cast(Any, tracing_store)
262
+ if hasattr(tracing_store_any, "store_trace"):
263
+ await tracing_store_any.store_trace(
264
+ session_id=f"ep{episode_id:04d}",
265
+ step=step_idx,
266
+ messages=trace_messages,
267
+ model=model,
268
+ metadata={
269
+ "seed": seed,
270
+ "has_image": policy.use_vision,
271
+ "provider": provider,
272
+ },
273
+ )
274
+ else:
275
+ logging.warning(
276
+ "Tracing backend does not expose store_trace(); skipping persistence for episode %s",
277
+ episode_id,
278
+ )
279
+
280
+ # Execute action
281
+ assistant_text = assistant_message.get("content")
282
+ env_response = await wrapper.step(assistant_tool_calls)
283
+ if not isinstance(env_response, dict):
284
+ break
285
+
286
+ # Update policy history
287
+ policy._append_assistant_turn( # noqa: SLF001
288
+ assistant_text,
289
+ assistant_tool_calls,
290
+ env_response,
291
+ )
292
+
293
+ steps_taken += 1
294
+
295
+ # Track achievements
296
+ obs = env_response.get("observation", {})
297
+ ach_status = obs.get("achievements_status", {})
298
+ for name, unlocked in ach_status.items():
299
+ if unlocked:
300
+ achievements.add(name)
301
+
302
+ if env_response.get("done"):
303
+ break
304
+ observation_packet = env_response
305
+
306
+ await wrapper.terminate()
307
+
308
+ total_steps += steps_taken
309
+ total_achievements += len(achievements)
310
+
311
+ print(
312
+ f"✓ Episode {episode_id:3d} (seed={seed}): {steps_taken} steps, "
313
+ f"{len(achievements)} achievements"
314
+ )
315
+
316
+ print()
317
+ print(f"✅ Collection complete!")
318
+ print(f" Total episodes: {num_episodes}")
319
+ print(f" Total steps: {total_steps}")
320
+ print(f" Avg achievements: {total_achievements / num_episodes:.2f}")
321
+ print(f" Database: {db_path}")
322
+ print()
323
+ print("Next steps:")
324
+ print(" 1. Export traces to SFT JSONL format")
325
+ print(" 2. Split into train/val datasets")
326
+ print(" 3. Train VLM with LoRA")
327
+
328
+ return db_path
329
+
330
+
331
+ async def main() -> None:
332
+ parser = argparse.ArgumentParser(description=__doc__)
333
+ parser.add_argument(
334
+ "--model",
335
+ required=True,
336
+ help="Model name (e.g., gpt-5-nano, Qwen/Qwen2-VL-7B-Instruct)",
337
+ )
338
+ parser.add_argument(
339
+ "--provider",
340
+ choices=["openai", "synth"],
341
+ required=True,
342
+ help="Inference provider",
343
+ )
344
+ parser.add_argument("--episodes", type=int, default=100, help="Number of episodes")
345
+ parser.add_argument("--max-steps", type=int, default=50, help="Max steps per episode")
346
+ parser.add_argument("--seed-start", type=int, default=0, help="Starting seed")
347
+ parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature")
348
+ parser.add_argument(
349
+ "--output-dir",
350
+ type=Path,
351
+ default=Path("traces/vision_traces"),
352
+ help="Output directory for traces",
353
+ )
354
+ args = parser.parse_args()
355
+
356
+ await collect_traces(
357
+ model=args.model,
358
+ provider=args.provider,
359
+ num_episodes=args.episodes,
360
+ max_steps=args.max_steps,
361
+ seed_start=args.seed_start,
362
+ output_dir=args.output_dir,
363
+ temperature=args.temperature,
364
+ )
365
+
366
+
367
+ if __name__ == "__main__":
368
+ asyncio.run(main())
@@ -0,0 +1,127 @@
1
+ # Crafter RL with Vision - Qwen3-VL-4B
2
+ #
3
+ # This configuration runs online RL (GRPO/GSPO) with a vision-language model
4
+ # using the same Crafter task app that generates image observations for SFT data.
5
+ #
6
+ # Model: Qwen/Qwen3-VL-4B (smaller, faster for testing)
7
+ # Task App: grpo-crafter-task-app (Modal deployed, supports vision)
8
+ # Policy: crafter-react with use_vision=true, image_only_mode=true
9
+
10
+ [algorithm]
11
+ type = "online"
12
+ method = "policy_gradient"
13
+ variety = "gspo"
14
+
15
+ [services]
16
+ # Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
17
+ task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
18
+
19
+ [compute]
20
+ gpu_type = "H200"
21
+ gpu_count = 2
22
+
23
+ [topology]
24
+ type = "single_node_split"
25
+ gpus_for_vllm = 1
26
+ gpus_for_training = 1
27
+ gpus_for_ref = 0
28
+ tensor_parallel = 1
29
+
30
+ [vllm]
31
+ tensor_parallel_size = 1
32
+ max_model_len = 4096
33
+ # Vision-specific settings
34
+ limit_mm_per_prompt = { "image": 1 } # Max 1 image per prompt
35
+
36
+ [reference]
37
+ placement = "none"
38
+
39
+ [model]
40
+ base = "Qwen/Qwen3-VL-4B-Instruct"
41
+ trainer_mode = "lora"
42
+ label = "crafter-rl-vision-qwen3vl4b"
43
+ supports_vision = true # Enable vision support
44
+
45
+ [lora]
46
+ r = 16
47
+ alpha = 32
48
+ dropout = 0.05
49
+ target_modules = ["all-linear"]
50
+ # Note: will automatically include mm_projector for vision models
51
+
52
+ [rollout]
53
+ env_name = "crafter"
54
+ max_turns = 10 # 10 steps per episode for faster testing
55
+ episodes_per_batch = 2
56
+ policy_name = "crafter-react"
57
+ max_concurrent_rollouts = 4 # Lower for vision models (memory)
58
+ batches_per_step = 2
59
+ ops = ["agent", "env"]
60
+
61
+ [rollout.env_config]
62
+ difficulty = "easy"
63
+
64
+ [rollout.env_config.step_rewards]
65
+ enabled = true
66
+ mode = "decision_stepwise"
67
+ strategy = "consistent"
68
+ indicator_lambda = 1.0
69
+ step_beta = 0.0
70
+
71
+ [rollout.policy_config]
72
+ # Vision-specific policy settings
73
+ use_vision = true # Enable vision input
74
+ image_only_mode = true # Use only images, no text observations
75
+ temperature = 0.6 # Slightly higher for exploration
76
+ top_p = 0.95
77
+ max_tokens = 512
78
+ max_llm_calls = 10
79
+
80
+ [evaluation]
81
+ instances = 8 # Lower for faster vision evals
82
+ every_n_iters = 5
83
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7]
84
+
85
+ [training]
86
+ num_epochs = 1
87
+ iterations_per_epoch = 3 # Shorter for integration test
88
+ gradient_accumulation_steps = 2
89
+ max_accumulated_minibatch = 1
90
+ max_turns = 10
91
+ batch_size = 2 # Smaller for vision models
92
+ group_size = 2
93
+ learning_rate = 5e-5
94
+ log_interval = 1
95
+ weight_sync_interval = 1
96
+ event_rewards_kind = "unique"
97
+ async_semaphore_max = 2 # Lower concurrency for vision
98
+
99
+ # Enable dense decision rewards
100
+ step_rewards_enabled = true
101
+ step_rewards_mode = "decision_stepwise"
102
+ step_rewards_indicator_lambda = 1.0
103
+ step_rewards_beta = 0.0
104
+ step_rewards_strategy = "consistent"
105
+
106
+ # Vision-specific training settings
107
+ max_images_per_message = 1 # Limit images for memory
108
+ supports_vision = true # Enable vision training path
109
+
110
+ [training.weight_sync]
111
+ enable = true
112
+ targets = ["policy"]
113
+ mode = "direct"
114
+ direct = true
115
+ verify_every_k = 0
116
+
117
+ [judge]
118
+ type = "env" # Use environment rewards only (simpler for testing)
119
+ timeout_s = 30
120
+
121
+ [tags]
122
+ experiment = "crafter_rl_vision_qwen3vl4b"
123
+ task = "crafter_agent_vision"
124
+ model_size = "4b"
125
+ vision_enabled = true
126
+ image_only = true
127
+
@@ -0,0 +1,60 @@
1
+ # Example Vision SFT Config for Crafter
2
+ # Train Qwen-VL on collected vision traces
3
+
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "lora"
8
+
9
+ [job]
10
+ model = "Qwen/Qwen2-VL-7B-Instruct" # or Qwen/Qwen3-VL-8B
11
+ # Dataset from collect_vision_traces.py → export_to_sft.py
12
+ data = "traces/gpt5nano_vision/train.jsonl"
13
+
14
+ [compute]
15
+ gpu_type = "H200"
16
+ gpu_count = 2 # 2x H200 (282GB total)
17
+ nodes = 1
18
+
19
+ [training]
20
+ mode = "lora" # SFT with LoRA
21
+ use_qlora = true # Quantized LoRA for memory efficiency
22
+
23
+ [hyperparameters]
24
+ n_epochs = 2 # 2 epochs over collected samples
25
+ per_device_batch = 1 # Batch size 1 (images are memory-intensive)
26
+ gradient_accumulation_steps = 32
27
+ sequence_length = 2048 # Shorter context (images dominate memory)
28
+ learning_rate = 5e-06
29
+ warmup_ratio = 0.03
30
+ train_kind = "peft"
31
+
32
+ # LoRA config
33
+ lora_rank = 16
34
+ lora_alpha = 32
35
+ lora_dropout = 0.05
36
+ lora_target_modules = ["all-linear"] # Full linear layer adaptation
37
+
38
+ # Training optimizations
39
+ [hyperparameters.parallelism]
40
+ use_deepspeed = true
41
+ deepspeed_stage = 2
42
+ fsdp = false
43
+ bf16 = true
44
+ fp16 = false
45
+ activation_checkpointing = true
46
+
47
+ # Evaluation
48
+ evaluation_strategy = "steps"
49
+ eval_steps = 100
50
+ save_best_model_at_end = true
51
+ metric_for_best_model = "val.loss"
52
+ greater_is_better = false
53
+ load_best_model_at_end = true
54
+
55
+ [tags]
56
+ task = "crafter"
57
+ modality = "vision"
58
+ data_source = "collected_traces"
59
+ model_family = "qwen_vl"
60
+
@@ -0,0 +1,43 @@
1
+ # Evaluation config for gpt-4o-mini with vision
2
+ # Stronger teacher than gpt-5-nano, use for high-quality distillation
3
+
4
+ [eval]
5
+ model = "gpt-4o-mini-2024-07-18"
6
+ provider = "openai" # Use OpenAI API
7
+
8
+ # Task app endpoint
9
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
10
+
11
+ # Vision settings (auto-detected from "gpt-4o" in model name)
12
+ use_vision = true
13
+ image_only_mode = false # Include both text + images
14
+
15
+ # Rollout settings
16
+ num_episodes = 100
17
+ max_steps_per_episode = 50
18
+ seeds = "200-299" # Different seeds for comparison
19
+
20
+ # Sampling parameters
21
+ temperature = 0.6 # Lower temperature for more consistent behavior
22
+ max_tokens = 512
23
+
24
+ # Trace collection
25
+ collect_traces = true
26
+ trace_db = "traces/gpt4o_mini_vision/rollouts.db"
27
+
28
+ # Tools
29
+ use_tools = true
30
+
31
+ # Parallel rollouts
32
+ parallel_episodes = 5
33
+
34
+ [task]
35
+ name = "crafter"
36
+ environment = "crafter-classic"
37
+
38
+ # Task-specific settings
39
+ [task.config]
40
+ seed_start = 200
41
+ max_episode_length = 256
42
+ render_size = [64, 64] # 64x64 PNG images
43
+
@@ -0,0 +1,29 @@
1
+ # Proper synth-ai eval config for Crafter with gpt-4o-mini vision
2
+ # Collects traces with images to database for synth-ai filter
3
+
4
+ [eval]
5
+ app_id = "grpo-crafter-task-app" # Modal deployed task app
6
+ model = "gpt-4o-mini-2024-07-18"
7
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # 10 episodes for test
8
+ max_turns = 50 # 50 steps per episode
9
+ concurrency = 2 # 2 parallel episodes
10
+ env_name = "crafter"
11
+ policy_name = "crafter-react"
12
+ trace_format = "structured" # Required for synth-ai eval
13
+ return_trace = true
14
+
15
+ [eval.env_config]
16
+ env_params = {max_steps_per_episode = 50}
17
+
18
+ [eval.policy_config]
19
+ provider = "openai"
20
+ model = "gpt-4o-mini-2024-07-18"
21
+ inference_url = "https://api.openai.com" # Base URL
22
+ # Note: Don't set temperature for gpt-4o-mini, use default
23
+ top_p = 0.95
24
+ max_tokens = 512
25
+ use_vision = true # Enable vision
26
+ image_only_mode = false # Use both text + images
27
+ max_llm_calls = 50
28
+ use_tools = true # Enable tool calling
29
+
@@ -0,0 +1,45 @@
1
+ # Evaluation config for gpt-4o-mini with vision
2
+ # Collects vision traces for SFT training
3
+ # Note: gpt-5-nano doesn't support tool calling yet, use gpt-4o-mini instead
4
+
5
+ [eval]
6
+ model = "gpt-4o-mini-2024-07-18" # Changed from gpt-5-nano (no tool support)
7
+ provider = "openai" # Use OpenAI API
8
+
9
+ # Task app endpoint (local or hosted)
10
+ # task_app_url = "http://localhost:8000" # Local
11
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run" # Hosted
12
+
13
+ # Vision settings (auto-detected from "gpt-5" in model name)
14
+ use_vision = true
15
+ image_only_mode = false # Include both text + images
16
+
17
+ # Rollout settings
18
+ num_episodes = 100
19
+ max_steps_per_episode = 50
20
+ seeds = "0-99" # Seeds 0 through 99
21
+
22
+ # Sampling parameters
23
+ temperature = 0.7
24
+ max_tokens = 512
25
+
26
+ # Trace collection
27
+ collect_traces = true
28
+ trace_db = "traces/gpt5nano_vision/rollouts.db"
29
+
30
+ # Tools
31
+ use_tools = true
32
+
33
+ # Parallel rollouts (speeds up collection)
34
+ parallel_episodes = 5 # Run 5 episodes in parallel
35
+
36
+ [task]
37
+ name = "crafter"
38
+ environment = "crafter-classic"
39
+
40
+ # Task-specific settings
41
+ [task.config]
42
+ seed_start = 0
43
+ max_episode_length = 256
44
+ render_size = [64, 64] # 64x64 PNG images
45
+