synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,44 @@
1
+ # Evaluation config for Qwen2-VL via synth-ai hosted inference
2
+ # Collects vision traces for SFT training
3
+
4
+ [eval]
5
+ model = "Qwen/Qwen2-VL-7B-Instruct"
6
+ provider = "synth" # Use synth-ai hosted inference
7
+
8
+ # Task app endpoint (local or hosted)
9
+ # task_app_url = "http://localhost:8000" # Local
10
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run" # Hosted
11
+
12
+ # Vision settings (auto-detected from "qwen2-vl" in model name)
13
+ use_vision = true
14
+ image_only_mode = false # Include both text + images
15
+
16
+ # Rollout settings
17
+ num_episodes = 100
18
+ max_steps_per_episode = 50
19
+ seeds = "100-199" # Different seeds from gpt-5-nano for comparison
20
+
21
+ # Sampling parameters
22
+ temperature = 0.7
23
+ max_tokens = 512
24
+
25
+ # Trace collection
26
+ collect_traces = true
27
+ trace_db = "traces/qwen2vl_vision/rollouts.db"
28
+
29
+ # Tools
30
+ use_tools = true
31
+
32
+ # Parallel rollouts
33
+ parallel_episodes = 5
34
+
35
+ [task]
36
+ name = "crafter"
37
+ environment = "crafter-classic"
38
+
39
+ # Task-specific settings
40
+ [task.config]
41
+ seed_start = 100
42
+ max_episode_length = 256
43
+ render_size = [64, 64] # 64x64 PNG images
44
+
@@ -0,0 +1,50 @@
1
+ # Filter Qwen2-VL vision traces for SFT training
2
+ # Same settings as gpt5nano filter but for Qwen2-VL traces
3
+
4
+ [filter]
5
+ input_db = "traces/qwen2vl_vision/rollouts.db"
6
+ output_dir = "traces/qwen2vl_vision/sft"
7
+
8
+ # Quality filters
9
+ min_steps_per_episode = 5
10
+ min_achievements_per_episode = 0
11
+ max_steps_per_episode = 50
12
+
13
+ # Behavioral filters
14
+ detect_loops = true
15
+ max_repeated_actions = 5
16
+ min_unique_states = 3
17
+
18
+ # Remove episodes with errors
19
+ filter_errors = true
20
+ filter_timeouts = true
21
+
22
+ # Export format
23
+ export_format = "sft_jsonl"
24
+ include_images = true
25
+ include_metadata = true
26
+
27
+ # SFT-specific processing
28
+ [sft]
29
+ max_sequence_length = 2048
30
+ deduplicate = true
31
+ shuffle = true
32
+ require_valid_tool_calls = true
33
+ filter_empty_responses = true
34
+
35
+ # Train/val split
36
+ [split]
37
+ enabled = true
38
+ val_fraction = 0.1
39
+ random_seed = 42
40
+ stratify_by = "achievements"
41
+
42
+ train_file = "train.jsonl"
43
+ val_file = "val.jsonl"
44
+
45
+ # Statistics
46
+ [output]
47
+ save_stats = true
48
+ stats_file = "filter_stats.json"
49
+ save_filtered_episode_ids = true
50
+
@@ -0,0 +1,53 @@
1
+ # Filter vision traces for SFT training
2
+ # Applies quality filters and exports to SFT JSONL format
3
+
4
+ [filter]
5
+ input_db = "traces/gpt4o_vision_test/rollouts.db"
6
+ output_dir = "traces/gpt4o_vision_test/sft"
7
+
8
+ # Quality filters
9
+ min_steps_per_episode = 5 # Remove very short episodes
10
+ min_achievements_per_episode = 0 # Allow any achievement count (even 0)
11
+ max_steps_per_episode = 50 # Cap maximum length
12
+
13
+ # Behavioral filters
14
+ detect_loops = true # Detect if agent got stuck
15
+ max_repeated_actions = 5 # Max same action in a row
16
+ min_unique_states = 3 # Require at least 3 unique states
17
+
18
+ # Remove episodes with errors
19
+ filter_errors = true
20
+ filter_timeouts = true
21
+
22
+ # Export format
23
+ export_format = "sft_jsonl" # OpenAI-style messages format
24
+ include_images = true # Keep base64 images in messages
25
+ include_metadata = true # Keep episode/step metadata
26
+
27
+ # SFT-specific processing
28
+ [sft]
29
+ max_sequence_length = 2048 # Truncate messages if longer
30
+ deduplicate = true # Remove duplicate state-action pairs
31
+ shuffle = true # Shuffle samples for training
32
+
33
+ # Keep only high-quality tool calls
34
+ require_valid_tool_calls = true
35
+ filter_empty_responses = true
36
+
37
+ # Train/val split
38
+ [split]
39
+ enabled = true
40
+ val_fraction = 0.1
41
+ random_seed = 42
42
+ stratify_by = "achievements" # Ensure val set has similar achievement distribution
43
+
44
+ # Output file names
45
+ train_file = "train.jsonl"
46
+ val_file = "val.jsonl"
47
+
48
+ # Statistics
49
+ [output]
50
+ save_stats = true
51
+ stats_file = "filter_stats.json"
52
+ save_filtered_episode_ids = true
53
+
@@ -0,0 +1,8 @@
1
+ # Simple filter config for testing vision trace export
2
+ [filter]
3
+ db = "traces/gpt4o_vision_test/rollouts.db"
4
+ output = "traces/gpt4o_vision_test/sft/train.jsonl"
5
+
6
+ # No filters - accept all traces
7
+ limit = 100 # Max 100 examples
8
+
@@ -0,0 +1,54 @@
1
+ # SFT Training Config for Qwen3-VL-2B with Vision Data
2
+ # Test config for validating vision fine-tuning pipeline
3
+
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "lora"
8
+
9
+ [job]
10
+ model = "Qwen/Qwen3-VL-2B-Instruct"
11
+ data = "examples/qwen_vl/test_data/vision_sft_test.jsonl"
12
+
13
+ [compute]
14
+ gpu_type = "H100"
15
+ gpu_count = 1
16
+ nodes = 1
17
+
18
+ [training]
19
+ mode = "lora"
20
+ use_qlora = false # Use full precision LoRA for vision
21
+
22
+ [training.validation]
23
+ enabled = false # Skip validation for quick test
24
+
25
+ [hyperparameters]
26
+ n_epochs = 2 # 2 epochs for test
27
+ train_kind = "peft"
28
+ per_device_batch = 1
29
+ gradient_accumulation_steps = 4
30
+ sequence_length = 2048 # Shorter for vision + text
31
+ learning_rate = 5e-5
32
+ warmup_ratio = 0.03
33
+ lora_rank = 16
34
+ lora_alpha = 32
35
+ lora_dropout = 0.05
36
+ lora_target_modules = ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "mm_projector"]
37
+
38
+ [hyperparameters.parallelism]
39
+ use_deepspeed = false
40
+ fsdp = false
41
+ bf16 = true
42
+ fp16 = false
43
+ activation_checkpointing = false
44
+
45
+ [model_config]
46
+ supports_vision = true
47
+ max_images_per_message = 1
48
+ max_model_len = 2048 # Short for test
49
+
50
+ [tags]
51
+ experiment = "test_vision_sft"
52
+ purpose = "integration_test"
53
+ model_size = "2B"
54
+ data_type = "synthetic_vision"
@@ -0,0 +1,308 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Crafter agent using OpenAI's gpt-5-nano vision model.
4
+
5
+ This demonstrates gpt-5-nano playing Crafter with image observations.
6
+ The CrafterPolicy automatically detects vision capability from the "gpt-5"
7
+ model name and includes base64-encoded PNG frames in the prompt.
8
+
9
+ Requirements:
10
+ - `OPENAI_API_KEY` environment variable
11
+ - `openai` Python package (installed via project dependencies)
12
+
13
+ Usage:
14
+ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py \
15
+ --model gpt-5-nano --seeds 10 --steps 20
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import argparse
21
+ import asyncio
22
+ import base64
23
+ import json
24
+ import os
25
+ from contextlib import suppress
26
+ from pathlib import Path
27
+ from typing import Any
28
+ from uuid import uuid4
29
+
30
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.environment import (
31
+ CrafterEnvironmentWrapper,
32
+ )
33
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.policy import CrafterPolicy
34
+ from openai import OpenAI
35
+ from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
36
+ from synth_ai.environments.examples.crafter_classic.taskset import (
37
+ CrafterTaskInstance,
38
+ CrafterTaskInstanceMetadata,
39
+ )
40
+ from synth_ai.environments.tasks.core import Impetus, Intent
41
+
42
+ DEFAULT_OUTPUT = Path("examples/qwen_vl/temp")
43
+ FRAME_SUBDIR = "gpt5nano_frames"
44
+
45
+
46
+ class EpisodeResult:
47
+ def __init__(self, seed: int) -> None:
48
+ self.seed = seed
49
+ self.steps_taken: int = 0
50
+ self.achievements: set[str] = set()
51
+ self.total_reward: float = 0.0
52
+ self.tool_calls: int = 0
53
+
54
+ def record_observation(self, observation: dict[str, Any]) -> None:
55
+ obs = observation.get("observation") if isinstance(observation, dict) else None
56
+ if not isinstance(obs, dict):
57
+ return
58
+ ach = obs.get("achievements_status")
59
+ if isinstance(ach, dict):
60
+ for name, unlocked in ach.items():
61
+ if unlocked:
62
+ self.achievements.add(str(name))
63
+ reward = obs.get("reward_last_step")
64
+ if isinstance(reward, int | float):
65
+ self.total_reward += float(reward)
66
+
67
+
68
+ def _ensure_client() -> OpenAI:
69
+ """Initialize OpenAI client."""
70
+ api_key = os.getenv("OPENAI_API_KEY")
71
+ if not api_key:
72
+ raise RuntimeError("OPENAI_API_KEY must be set for OpenAI calls")
73
+ return OpenAI(api_key=api_key)
74
+
75
+
76
+ def _build_task_instance(seed: int) -> CrafterTaskInstance:
77
+ """Create a Crafter task instance with specified seed."""
78
+ impetus = Impetus(instructions="Explore, survive, and unlock achievements.")
79
+ intent = Intent(
80
+ rubric={"goal": "Maximise Crafter achievements."},
81
+ gold_trajectories=None,
82
+ gold_state_diff={},
83
+ )
84
+ metadata = CrafterTaskInstanceMetadata(
85
+ difficulty="custom",
86
+ seed=seed,
87
+ num_trees_radius=0,
88
+ num_cows_radius=0,
89
+ num_hostiles_radius=0,
90
+ )
91
+ instance = CrafterTaskInstance(
92
+ id=uuid4(),
93
+ impetus=impetus,
94
+ intent=intent,
95
+ metadata=metadata,
96
+ is_reproducible=True,
97
+ initial_engine_snapshot=None,
98
+ )
99
+ instance.config = {"seed": seed, "length": 256, "area": [64, 64]}
100
+ return instance
101
+
102
+
103
+ def _decode_and_save_image(observation: dict[str, Any], path: Path) -> None:
104
+ """Extract and save PNG frame from observation."""
105
+ obs = observation.get("observation") if isinstance(observation, dict) else None
106
+ if not isinstance(obs, dict):
107
+ return
108
+ base64_data = obs.get("observation_image_base64")
109
+ if not isinstance(base64_data, str) or not base64_data:
110
+ return
111
+ path.parent.mkdir(parents=True, exist_ok=True)
112
+ with suppress(Exception):
113
+ path.write_bytes(base64.b64decode(base64_data))
114
+
115
+
116
+ def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
117
+ """Normalize inference request for OpenAI API."""
118
+ request = dict(payload)
119
+ request["model"] = model
120
+
121
+ # Remove vendor-specific knobs unsupported by OpenAI
122
+ request.pop("stop_after_tool_calls", None)
123
+ request.pop("thinking_mode", None)
124
+ request.pop("thinking_budget", None)
125
+
126
+ # gpt-5 models have specific requirements
127
+ if "gpt-5" in model.lower():
128
+ # gpt-5-nano only supports temperature=1 (default)
129
+ request.pop("temperature", None) # Remove custom temperature
130
+ request.setdefault("max_completion_tokens", 512)
131
+ request.pop("max_tokens", None) # Remove if present
132
+ else:
133
+ # Older models use max_tokens and support custom temperature
134
+ request.setdefault("temperature", temperature)
135
+ max_completion = request.pop("max_completion_tokens", None)
136
+ if max_completion is not None:
137
+ request["max_tokens"] = max_completion
138
+ else:
139
+ request.setdefault("max_tokens", 512)
140
+
141
+ return request
142
+
143
+
144
+ async def _run_episode(
145
+ *,
146
+ seed: int,
147
+ client: OpenAI,
148
+ model: str,
149
+ max_steps: int,
150
+ output_dir: Path,
151
+ temperature: float,
152
+ ) -> EpisodeResult:
153
+ """Run a single Crafter episode with gpt-5-nano."""
154
+ task_instance = _build_task_instance(seed)
155
+ env = CrafterClassicEnvironment(task_instance)
156
+ wrapper = CrafterEnvironmentWrapper(env, seed=seed)
157
+
158
+ # Policy will auto-detect vision from model name (gpt-5*)
159
+ policy = CrafterPolicy(inference_url="openai://chat-completions", model=model)
160
+ await policy.initialize({"use_tools": True, "model": model})
161
+
162
+ episode_result = EpisodeResult(seed=seed)
163
+
164
+ observation_packet = await wrapper.initialize()
165
+ episode_result.record_observation(observation_packet)
166
+
167
+ frames_root = output_dir / FRAME_SUBDIR / f"seed_{seed:04d}"
168
+ _decode_and_save_image(observation_packet, frames_root / "step_000.png")
169
+
170
+ for step_idx in range(max_steps):
171
+ obs_dict = observation_packet.get("observation")
172
+ if not isinstance(obs_dict, dict):
173
+ break
174
+
175
+ # Format observation text
176
+ obs_text = policy._format_observation_for_llm(observation_packet) # noqa: SLF001
177
+
178
+ # Get tool calls from policy
179
+ tool_calls, meta = await policy.step(
180
+ observation_text=obs_text,
181
+ metadata={"raw_observation": observation_packet},
182
+ )
183
+ if "inference_request" not in meta:
184
+ break
185
+
186
+ episode_result.steps_taken += 1
187
+ inference_request = _normalise_openai_request(
188
+ meta["inference_request"],
189
+ model=model,
190
+ temperature=temperature,
191
+ )
192
+
193
+ # Call OpenAI API
194
+ response = client.chat.completions.create(**inference_request)
195
+ response_dict = response.model_dump()
196
+
197
+ # Parse tool calls
198
+ assistant_tool_calls = CrafterPolicy.parse_response_to_tool_calls(
199
+ response_dict,
200
+ use_tools=policy.use_tools,
201
+ )
202
+ if not assistant_tool_calls:
203
+ print(
204
+ f"Seed {seed}: no tool calls returned by model; ending episode early at step {step_idx}."
205
+ )
206
+ break
207
+
208
+ episode_result.tool_calls += len(assistant_tool_calls)
209
+
210
+ # Extract assistant message
211
+ assistant_message = response_dict["choices"][0].get("message") or {}
212
+ assistant_text = assistant_message.get("content")
213
+
214
+ # Execute action in environment
215
+ env_response = await wrapper.step(assistant_tool_calls)
216
+ if not isinstance(env_response, dict):
217
+ raise RuntimeError(
218
+ f"Unexpected environment response type: {type(env_response)!r}"
219
+ )
220
+ episode_result.record_observation(env_response)
221
+
222
+ # Update policy history
223
+ policy._append_assistant_turn( # noqa: SLF001
224
+ assistant_text,
225
+ assistant_tool_calls,
226
+ env_response,
227
+ )
228
+
229
+ # Save frame
230
+ frame_path = frames_root / f"step_{step_idx + 1:03d}.png"
231
+ _decode_and_save_image(env_response, frame_path)
232
+
233
+ if env_response.get("done"):
234
+ break
235
+ observation_packet = env_response
236
+
237
+ await wrapper.terminate()
238
+ return episode_result
239
+
240
+
241
+ async def main() -> None:
242
+ parser = argparse.ArgumentParser(description=__doc__)
243
+ parser.add_argument(
244
+ "--model",
245
+ default="gpt-5-nano",
246
+ help="OpenAI model name (e.g., gpt-5-nano, gpt-4o-mini-2024-07-18)",
247
+ )
248
+ parser.add_argument("--seeds", type=int, default=10, help="Number of random seeds to evaluate")
249
+ parser.add_argument("--steps", type=int, default=20, help="Max steps per seed")
250
+ parser.add_argument("--temperature", type=float, default=0.6, help="Sampling temperature")
251
+ parser.add_argument(
252
+ "--output-dir",
253
+ type=Path,
254
+ default=DEFAULT_OUTPUT,
255
+ help=f"Directory for saved frames and summaries (default: {DEFAULT_OUTPUT})",
256
+ )
257
+ args = parser.parse_args()
258
+
259
+ client = _ensure_client()
260
+ results: list[EpisodeResult] = []
261
+
262
+ seeds = list(range(args.seeds))
263
+ print(f"Running {len(seeds)} Crafter episodes with model={args.model}")
264
+ print(f"Using OpenAI API\n")
265
+
266
+ for seed in seeds:
267
+ result = await _run_episode(
268
+ seed=seed,
269
+ client=client,
270
+ model=args.model,
271
+ max_steps=args.steps,
272
+ output_dir=args.output_dir,
273
+ temperature=args.temperature,
274
+ )
275
+ results.append(result)
276
+ print(
277
+ f"Seed {seed:02d}: steps={result.steps_taken}, "
278
+ f"achievements={len(result.achievements)}, "
279
+ f"tool_calls={result.tool_calls}, reward≈{result.total_reward:.3f}"
280
+ )
281
+
282
+ summary = {
283
+ "model": args.model,
284
+ "provider": "openai",
285
+ "episodes": len(results),
286
+ "mean_steps": round(
287
+ sum(res.steps_taken for res in results) / max(len(results), 1), 2
288
+ ),
289
+ "mean_achievements": round(
290
+ sum(len(res.achievements) for res in results) / max(len(results), 1), 2
291
+ ),
292
+ "total_tool_calls": sum(res.tool_calls for res in results),
293
+ "output_dir": str(args.output_dir / FRAME_SUBDIR),
294
+ }
295
+
296
+ args.output_dir.mkdir(parents=True, exist_ok=True)
297
+ summary_path = args.output_dir / "gpt5nano_summary.json"
298
+ summary_path.write_text(json.dumps(summary, indent=2), encoding="utf-8")
299
+
300
+ print("\nSummary")
301
+ print("-------")
302
+ print(json.dumps(summary, indent=2))
303
+ print(f"\nFrames saved in: {summary['output_dir']}")
304
+
305
+
306
+ if __name__ == "__main__":
307
+ asyncio.run(main())
308
+