synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,368 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Collect Crafter vision traces for SFT dataset creation.
4
+
5
+ Supports both:
6
+ 1. OpenAI models (gpt-5-nano, gpt-4o-mini) via OpenAI API
7
+ 2. Qwen-VL models via synth-ai hosted inference
8
+
9
+ Traces are stored in SQLite with full multimodal messages (text + base64 images)
10
+ ready for export to SFT JSONL format.
11
+
12
+ Requirements:
13
+ - For OpenAI: OPENAI_API_KEY environment variable
14
+ - For synth-ai: SYNTH_API_KEY environment variable
15
+
16
+ Usage:
17
+ # Collect with gpt-5-nano
18
+ uv run python examples/qwen_vl/collect_vision_traces.py \
19
+ --model gpt-5-nano \
20
+ --provider openai \
21
+ --episodes 100 \
22
+ --max-steps 50 \
23
+ --output-dir traces/gpt5nano_vision
24
+
25
+ # Collect with Qwen3-VL via synth
26
+ uv run python examples/qwen_vl/collect_vision_traces.py \
27
+ --model Qwen/Qwen3-VL-8B-Instruct \
28
+ --provider synth \
29
+ --episodes 100 \
30
+ --max-steps 50 \
31
+ --output-dir traces/qwen3vl_vision
32
+ """
33
+
34
+ from __future__ import annotations
35
+
36
+ import argparse
37
+ import asyncio
38
+ import json
39
+ import logging
40
+ import os
41
+ from pathlib import Path
42
+ from typing import Any, cast
43
+ from uuid import uuid4
44
+
45
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.environment import (
46
+ CrafterEnvironmentWrapper,
47
+ )
48
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.policy import CrafterPolicy
49
+ from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
50
+ from synth_ai.environments.examples.crafter_classic.taskset import (
51
+ CrafterTaskInstance,
52
+ CrafterTaskInstanceMetadata,
53
+ )
54
+ from synth_ai.environments.tasks.core import Impetus, Intent
55
+
56
+ # Try importing trace storage
57
+ try:
58
+ from synth_ai.tracing_v3.storage import create_storage
59
+ from synth_ai.tracing_v3.storage.config import StorageBackend, StorageConfig
60
+ TRACING_AVAILABLE = True
61
+ except ImportError:
62
+ print("Warning: Tracing storage not available. Traces will not be persisted.")
63
+ TRACING_AVAILABLE = False
64
+
65
+
66
+ def _get_openai_client():
67
+ """Get OpenAI client."""
68
+ from openai import OpenAI
69
+
70
+ api_key = os.getenv("OPENAI_API_KEY")
71
+ if not api_key:
72
+ raise RuntimeError("OPENAI_API_KEY not set")
73
+ return OpenAI(api_key=api_key)
74
+
75
+
76
+ def _default_backend_base_url() -> str:
77
+ raw = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api").strip()
78
+ return raw if raw.endswith("/api") else f"{raw}/api"
79
+
80
+
81
+ def _get_synth_client():
82
+ """Get synth-ai inference client."""
83
+ from synth_ai.inference.client import InferenceClient
84
+
85
+ api_key = os.getenv("SYNTH_API_KEY")
86
+ if not api_key:
87
+ raise RuntimeError("SYNTH_API_KEY not set")
88
+ base_url = os.getenv("SYNTH_BASE_URL", _default_backend_base_url())
89
+ return InferenceClient(base_url=base_url, api_key=api_key)
90
+
91
+
92
+ def _build_task_instance(seed: int) -> CrafterTaskInstance:
93
+ """Create Crafter task instance."""
94
+ impetus = Impetus(instructions="Explore, survive, and unlock achievements.")
95
+ intent = Intent(
96
+ rubric={"goal": "Maximise Crafter achievements."},
97
+ gold_trajectories=None,
98
+ gold_state_diff={},
99
+ )
100
+ metadata = CrafterTaskInstanceMetadata(
101
+ difficulty="custom",
102
+ seed=seed,
103
+ num_trees_radius=0,
104
+ num_cows_radius=0,
105
+ num_hostiles_radius=0,
106
+ )
107
+ instance = CrafterTaskInstance(
108
+ id=uuid4(),
109
+ impetus=impetus,
110
+ intent=intent,
111
+ metadata=metadata,
112
+ is_reproducible=True,
113
+ initial_engine_snapshot=None,
114
+ )
115
+ setattr(instance, "config", {"seed": seed, "length": 256, "area": [64, 64]})
116
+ return instance
117
+
118
+
119
+ def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
120
+ """Normalize inference request for OpenAI API."""
121
+ request = dict(payload)
122
+ request["model"] = model
123
+
124
+ # Remove vendor-specific knobs
125
+ request.pop("stop_after_tool_calls", None)
126
+ request.pop("thinking_mode", None)
127
+ request.pop("thinking_budget", None)
128
+
129
+ # gpt-5 models have specific requirements
130
+ if "gpt-5" in model.lower():
131
+ # gpt-5-nano only supports temperature=1 (default)
132
+ request.pop("temperature", None) # Remove custom temperature
133
+ request.setdefault("max_completion_tokens", 512)
134
+ request.pop("max_tokens", None) # Remove if present
135
+ else:
136
+ # Older models use max_tokens and support custom temperature
137
+ request.setdefault("temperature", temperature)
138
+ max_completion = request.pop("max_completion_tokens", None)
139
+ if max_completion is not None:
140
+ request["max_tokens"] = max_completion
141
+ else:
142
+ request.setdefault("max_tokens", 512)
143
+
144
+ return request
145
+
146
+
147
+ async def collect_traces(
148
+ model: str,
149
+ provider: str,
150
+ num_episodes: int,
151
+ max_steps: int,
152
+ seed_start: int,
153
+ output_dir: Path,
154
+ temperature: float,
155
+ ):
156
+ """Collect vision traces for SFT."""
157
+ # Setup tracing store
158
+ if not TRACING_AVAILABLE:
159
+ raise RuntimeError("Tracing storage not available. Cannot persist traces.")
160
+
161
+ output_dir.mkdir(parents=True, exist_ok=True)
162
+ db_path = output_dir / "rollouts.db"
163
+ storage_config = StorageConfig(
164
+ backend=StorageBackend.SQLITE,
165
+ connection_string=f"sqlite+aiosqlite:///{db_path}",
166
+ )
167
+ tracing_store = create_storage(storage_config)
168
+ await tracing_store.initialize()
169
+
170
+ # Setup inference client
171
+ if provider == "openai":
172
+ client = _get_openai_client()
173
+ inference_url = "openai://chat-completions"
174
+ elif provider == "synth":
175
+ client = _get_synth_client()
176
+ inference_url = "synth://inference"
177
+ else:
178
+ raise ValueError(f"Unknown provider: {provider}")
179
+
180
+ print(f"🎮 Collecting {num_episodes} episodes with {model}")
181
+ print(f" Provider: {provider}")
182
+ print(f" Max steps: {max_steps}")
183
+ print(f" Output: {output_dir}")
184
+ print(f" Database: {db_path}")
185
+ print()
186
+
187
+ total_steps = 0
188
+ total_achievements = 0
189
+
190
+ for episode_id in range(num_episodes):
191
+ seed = seed_start + episode_id
192
+
193
+ # Build task instance
194
+ task_instance = _build_task_instance(seed)
195
+ env = CrafterClassicEnvironment(task_instance)
196
+ wrapper = CrafterEnvironmentWrapper(env, seed=seed)
197
+
198
+ # Initialize policy (vision auto-detected from model name)
199
+ policy = CrafterPolicy(inference_url=inference_url, model=model)
200
+ await policy.initialize({
201
+ "use_tools": True,
202
+ "model": model,
203
+ "temperature": temperature,
204
+ "max_tokens": 512,
205
+ })
206
+
207
+ observation_packet = await wrapper.initialize()
208
+
209
+ steps_taken = 0
210
+ achievements = set()
211
+
212
+ # Run episode
213
+ for step_idx in range(max_steps):
214
+ obs_dict = observation_packet.get("observation")
215
+ if not isinstance(obs_dict, dict):
216
+ break
217
+
218
+ # Format observation
219
+ obs_text = policy._format_observation_for_llm(observation_packet) # noqa: SLF001
220
+
221
+ # Get tool calls from policy
222
+ tool_calls, meta = await policy.step(
223
+ observation_text=obs_text,
224
+ metadata={"raw_observation": observation_packet},
225
+ )
226
+ if "inference_request" not in meta:
227
+ break
228
+
229
+ inference_request = meta["inference_request"]
230
+
231
+ # Call inference
232
+ if provider == "openai":
233
+ normalized_request = _normalise_openai_request(
234
+ inference_request,
235
+ model=model,
236
+ temperature=temperature,
237
+ )
238
+ response = client.chat.completions.create(**normalized_request)
239
+ response_dict = response.model_dump()
240
+ else: # synth
241
+ response_dict = await client.create_chat_completion(
242
+ model=model,
243
+ messages=inference_request["messages"],
244
+ temperature=temperature,
245
+ max_tokens=512,
246
+ tools=inference_request.get("tools"),
247
+ )
248
+
249
+ # Parse tool calls
250
+ assistant_tool_calls = CrafterPolicy.parse_response_to_tool_calls(
251
+ response_dict,
252
+ use_tools=policy.use_tools,
253
+ )
254
+ if not assistant_tool_calls:
255
+ break
256
+
257
+ # Store trace
258
+ assistant_message = response_dict["choices"][0].get("message", {})
259
+ trace_messages = inference_request["messages"] + [assistant_message]
260
+
261
+ tracing_store_any = cast(Any, tracing_store)
262
+ if hasattr(tracing_store_any, "store_trace"):
263
+ await tracing_store_any.store_trace(
264
+ session_id=f"ep{episode_id:04d}",
265
+ step=step_idx,
266
+ messages=trace_messages,
267
+ model=model,
268
+ metadata={
269
+ "seed": seed,
270
+ "has_image": policy.use_vision,
271
+ "provider": provider,
272
+ },
273
+ )
274
+ else:
275
+ logging.warning(
276
+ "Tracing backend does not expose store_trace(); skipping persistence for episode %s",
277
+ episode_id,
278
+ )
279
+
280
+ # Execute action
281
+ assistant_text = assistant_message.get("content")
282
+ env_response = await wrapper.step(assistant_tool_calls)
283
+ if not isinstance(env_response, dict):
284
+ break
285
+
286
+ # Update policy history
287
+ policy._append_assistant_turn( # noqa: SLF001
288
+ assistant_text,
289
+ assistant_tool_calls,
290
+ env_response,
291
+ )
292
+
293
+ steps_taken += 1
294
+
295
+ # Track achievements
296
+ obs = env_response.get("observation", {})
297
+ ach_status = obs.get("achievements_status", {})
298
+ for name, unlocked in ach_status.items():
299
+ if unlocked:
300
+ achievements.add(name)
301
+
302
+ if env_response.get("done"):
303
+ break
304
+ observation_packet = env_response
305
+
306
+ await wrapper.terminate()
307
+
308
+ total_steps += steps_taken
309
+ total_achievements += len(achievements)
310
+
311
+ print(
312
+ f"✓ Episode {episode_id:3d} (seed={seed}): {steps_taken} steps, "
313
+ f"{len(achievements)} achievements"
314
+ )
315
+
316
+ print()
317
+ print(f"✅ Collection complete!")
318
+ print(f" Total episodes: {num_episodes}")
319
+ print(f" Total steps: {total_steps}")
320
+ print(f" Avg achievements: {total_achievements / num_episodes:.2f}")
321
+ print(f" Database: {db_path}")
322
+ print()
323
+ print("Next steps:")
324
+ print(" 1. Export traces to SFT JSONL format")
325
+ print(" 2. Split into train/val datasets")
326
+ print(" 3. Train VLM with LoRA")
327
+
328
+ return db_path
329
+
330
+
331
+ async def main() -> None:
332
+ parser = argparse.ArgumentParser(description=__doc__)
333
+ parser.add_argument(
334
+ "--model",
335
+ required=True,
336
+ help="Model name (e.g., gpt-5-nano, Qwen/Qwen3-VL-8B-Instruct)",
337
+ )
338
+ parser.add_argument(
339
+ "--provider",
340
+ choices=["openai", "synth"],
341
+ required=True,
342
+ help="Inference provider",
343
+ )
344
+ parser.add_argument("--episodes", type=int, default=100, help="Number of episodes")
345
+ parser.add_argument("--max-steps", type=int, default=50, help="Max steps per episode")
346
+ parser.add_argument("--seed-start", type=int, default=0, help="Starting seed")
347
+ parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature")
348
+ parser.add_argument(
349
+ "--output-dir",
350
+ type=Path,
351
+ default=Path("traces/vision_traces"),
352
+ help="Output directory for traces",
353
+ )
354
+ args = parser.parse_args()
355
+
356
+ await collect_traces(
357
+ model=args.model,
358
+ provider=args.provider,
359
+ num_episodes=args.episodes,
360
+ max_steps=args.max_steps,
361
+ seed_start=args.seed_start,
362
+ output_dir=args.output_dir,
363
+ temperature=args.temperature,
364
+ )
365
+
366
+
367
+ if __name__ == "__main__":
368
+ asyncio.run(main())
@@ -0,0 +1,110 @@
1
+ [algorithm]
2
+ type = "online"
3
+ method = "policy_gradient"
4
+ variety = "gspo"
5
+
6
+ [services]
7
+ task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
8
+
9
+ [compute]
10
+ gpu_type = "H200"
11
+ gpu_count = 2
12
+
13
+ [topology]
14
+ type = "single_node_split"
15
+ gpus_for_vllm = 1
16
+ gpus_for_training = 1
17
+ gpus_for_ref = 0
18
+ tensor_parallel = 1
19
+
20
+ [vllm]
21
+ tensor_parallel_size = 1
22
+ max_model_len = 4096
23
+
24
+ [reference]
25
+ placement = "none"
26
+
27
+ [model]
28
+ base = "Qwen/Qwen3-VL-4B-Instruct"
29
+ trainer_mode = "lora"
30
+ label = "crafter-rl-vision-qwen3vl4b"
31
+ supports_vision = true
32
+
33
+ [lora]
34
+ r = 16
35
+ alpha = 32
36
+ dropout = 0.05
37
+ target_modules = [ "all-linear",]
38
+
39
+ [rollout]
40
+ env_name = "crafter"
41
+ max_turns = 10
42
+ episodes_per_batch = 2
43
+ policy_name = "crafter-react"
44
+ max_concurrent_rollouts = 4
45
+ batches_per_step = 2
46
+ ops = [ "agent", "env",]
47
+
48
+ [evaluation]
49
+ instances = 8
50
+ every_n_iters = 5
51
+ seeds = [ 0, 1, 2, 3, 4, 5, 6, 7,]
52
+
53
+ [training]
54
+ num_epochs = 1
55
+ iterations_per_epoch = 3
56
+ gradient_accumulation_steps = 2
57
+ max_accumulated_minibatch = 1
58
+ max_turns = 10
59
+ batch_size = 2
60
+ group_size = 2
61
+ learning_rate = 5e-5
62
+ log_interval = 1
63
+ weight_sync_interval = 1
64
+ event_rewards_kind = "unique"
65
+ async_semaphore_max = 2
66
+ step_rewards_enabled = true
67
+ step_rewards_mode = "decision_stepwise"
68
+ step_rewards_indicator_lambda = 1.0
69
+ step_rewards_beta = 0.0
70
+ step_rewards_strategy = "consistent"
71
+ max_images_per_message = 1
72
+ supports_vision = true
73
+
74
+ [tags]
75
+ experiment = "crafter_rl_vision_qwen3vl4b"
76
+ task = "crafter_agent_vision"
77
+ model_size = "4b"
78
+ vision_enabled = true
79
+ image_only = true
80
+
81
+ [vllm.limit_mm_per_prompt]
82
+ image = 1
83
+
84
+ [rollout.env_config]
85
+ difficulty = "easy"
86
+
87
+ [rollout.policy_config]
88
+ use_vision = true
89
+ image_only_mode = true
90
+ temperature = 0.6
91
+ top_p = 0.95
92
+ max_tokens = 512
93
+ max_llm_calls = 10
94
+
95
+ [training.weight_sync]
96
+ enable = true
97
+ targets = [ "policy",]
98
+ mode = "direct"
99
+ direct = true
100
+ verify_every_k = 0
101
+
102
+ [judge.options]
103
+ timeout_s = 30
104
+
105
+ [rollout.env_config.step_rewards]
106
+ enabled = true
107
+ mode = "decision_stepwise"
108
+ strategy = "consistent"
109
+ indicator_lambda = 1.0
110
+ step_beta = 0.0
@@ -0,0 +1,59 @@
1
+ # Example Vision SFT Config for Crafter
2
+ # Train Qwen-VL on collected vision traces
3
+
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "lora"
8
+
9
+ [job]
10
+ model = "Qwen/Qwen3-VL-8B-Instruct" # or Qwen/Qwen3-VL-4B-Instruct
11
+ # Dataset from collect_vision_traces.py → export_to_sft.py
12
+ data = "traces/gpt5nano_vision/train.jsonl"
13
+
14
+ [compute]
15
+ gpu_type = "H200"
16
+ gpu_count = 2 # 2x H200 (282GB total)
17
+ nodes = 1
18
+
19
+ [training]
20
+ mode = "lora" # SFT with LoRA
21
+ use_qlora = true # Quantized LoRA for memory efficiency
22
+
23
+ [hyperparameters]
24
+ n_epochs = 2 # 2 epochs over collected samples
25
+ per_device_batch = 1 # Batch size 1 (images are memory-intensive)
26
+ gradient_accumulation_steps = 32
27
+ sequence_length = 2048 # Shorter context (images dominate memory)
28
+ learning_rate = 5e-06
29
+ warmup_ratio = 0.03
30
+ train_kind = "peft"
31
+
32
+ # LoRA config
33
+ lora_rank = 16
34
+ lora_alpha = 32
35
+ lora_dropout = 0.05
36
+ lora_target_modules = ["all-linear"] # Full linear layer adaptation
37
+
38
+ # Training optimizations
39
+ [hyperparameters.parallelism]
40
+ use_deepspeed = true
41
+ deepspeed_stage = 2
42
+ fsdp = false
43
+ bf16 = true
44
+ fp16 = false
45
+ activation_checkpointing = true
46
+
47
+ # Evaluation
48
+ evaluation_strategy = "steps"
49
+ eval_steps = 100
50
+ save_best_model_at_end = true
51
+ metric_for_best_model = "val.loss"
52
+ greater_is_better = false
53
+ load_best_model_at_end = true
54
+
55
+ [tags]
56
+ task = "crafter"
57
+ modality = "vision"
58
+ data_source = "collected_traces"
59
+ model_family = "qwen_vl"
@@ -0,0 +1,26 @@
1
+ # Evaluation config for gpt-4o-mini with vision
2
+ # Higher-quality teacher for Crafter SFT distillation
3
+
4
+ [eval]
5
+ app_id = "grpo-crafter-task-app"
6
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
7
+ model = "gpt-4o-mini-2024-07-18"
8
+ seeds = "200-299"
9
+ max_turns = 50
10
+ concurrency = 5
11
+ env_name = "crafter"
12
+ policy_name = "crafter-react"
13
+ trace_format = "structured"
14
+ return_trace = true
15
+
16
+ [eval.env_config]
17
+ env_params = {max_steps_per_episode = 50}
18
+
19
+ [eval.policy_config]
20
+ provider = "openai"
21
+ model = "gpt-4o-mini-2024-07-18"
22
+ temperature = 0.6
23
+ max_tokens = 512
24
+ use_vision = true
25
+ image_only_mode = false
26
+ use_tools = true
@@ -0,0 +1,29 @@
1
+ # Proper synth-ai eval config for Crafter with gpt-4o-mini vision
2
+ # Collects traces with images to database for synth-ai filter
3
+
4
+ [eval]
5
+ app_id = "grpo-crafter-task-app" # Modal deployed task app
6
+ model = "gpt-4o-mini-2024-07-18"
7
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # 10 episodes for test
8
+ max_turns = 50 # 50 steps per episode
9
+ concurrency = 2 # 2 parallel episodes
10
+ env_name = "crafter"
11
+ policy_name = "crafter-react"
12
+ trace_format = "structured" # Required for synth-ai eval
13
+ return_trace = true
14
+
15
+ [eval.env_config]
16
+ env_params = {max_steps_per_episode = 50}
17
+
18
+ [eval.policy_config]
19
+ provider = "openai"
20
+ model = "gpt-4o-mini-2024-07-18"
21
+ inference_url = "https://api.openai.com" # Base URL
22
+ # Note: Don't set temperature for gpt-4o-mini, use default
23
+ top_p = 0.95
24
+ max_tokens = 512
25
+ use_vision = true # Enable vision
26
+ image_only_mode = false # Use both text + images
27
+ max_llm_calls = 50
28
+ use_tools = true # Enable tool calling
29
+
@@ -0,0 +1,26 @@
1
+ # Evaluation config for gpt-4o-mini (vision)
2
+ # Collects traces for SFT training; legacy gpt-5-nano naming kept for convenience
3
+
4
+ [eval]
5
+ app_id = "grpo-crafter-task-app"
6
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
7
+ model = "gpt-4o-mini-2024-07-18"
8
+ seeds = "0-99"
9
+ max_turns = 50
10
+ concurrency = 5
11
+ env_name = "crafter"
12
+ policy_name = "crafter-react"
13
+ trace_format = "structured"
14
+ return_trace = true
15
+
16
+ [eval.env_config]
17
+ env_params = {max_steps_per_episode = 50}
18
+
19
+ [eval.policy_config]
20
+ provider = "openai"
21
+ model = "gpt-4o-mini-2024-07-18"
22
+ temperature = 0.7
23
+ max_tokens = 512
24
+ use_vision = true
25
+ image_only_mode = false
26
+ use_tools = true
@@ -0,0 +1,26 @@
1
+ # Evaluation config for Qwen3-VL vision rollouts
2
+ # Collects traces for SFT training via synth-ai hosted inference
3
+
4
+ [eval]
5
+ app_id = "grpo-crafter-task-app"
6
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
7
+ model = "Qwen/Qwen3-VL-8B-Instruct"
8
+ seeds = "100-199"
9
+ max_turns = 50
10
+ concurrency = 5
11
+ env_name = "crafter"
12
+ policy_name = "crafter-react"
13
+ trace_format = "structured"
14
+ return_trace = true
15
+
16
+ [eval.env_config]
17
+ env_params = {max_steps_per_episode = 50}
18
+
19
+ [eval.policy_config]
20
+ provider = "synth"
21
+ model = "Qwen/Qwen3-VL-8B-Instruct"
22
+ temperature = 0.7
23
+ max_tokens = 512
24
+ use_vision = true
25
+ image_only_mode = false
26
+ use_tools = true
@@ -0,0 +1,49 @@
1
+ # Filter Qwen3-VL vision traces for SFT training
2
+ # Mirrors the GPT-4o mini filter configuration for vision data
3
+
4
+ [filter]
5
+ input_db = "traces/qwen3vl_vision/rollouts.db"
6
+ output_dir = "traces/qwen3vl_vision/sft"
7
+
8
+ # Quality filters
9
+ min_steps_per_episode = 5
10
+ min_achievements_per_episode = 0
11
+ max_steps_per_episode = 50
12
+
13
+ # Behavioral filters
14
+ detect_loops = true
15
+ max_repeated_actions = 5
16
+ min_unique_states = 3
17
+
18
+ # Remove episodes with errors
19
+ filter_errors = true
20
+ filter_timeouts = true
21
+
22
+ # Export format
23
+ export_format = "sft_jsonl"
24
+ include_images = true
25
+ include_metadata = true
26
+
27
+ # SFT-specific processing
28
+ [sft]
29
+ max_sequence_length = 2048
30
+ deduplicate = true
31
+ shuffle = true
32
+ require_valid_tool_calls = true
33
+ filter_empty_responses = true
34
+
35
+ # Train/val split
36
+ [split]
37
+ enabled = true
38
+ val_fraction = 0.1
39
+ random_seed = 42
40
+ stratify_by = "achievements"
41
+
42
+ train_file = "train.jsonl"
43
+ val_file = "val.jsonl"
44
+
45
+ # Statistics
46
+ [output]
47
+ save_stats = true
48
+ stats_file = "filter_stats.json"
49
+ save_filtered_episode_ids = true