synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
examples/README.md ADDED
@@ -0,0 +1 @@
1
+ ### The instructions for how to create and configure a task app are documented at https://docs.usesynth.ai/sdk/task-apps
@@ -5,7 +5,7 @@
5
5
  # Output: Markdown tables and JSON data (no plotting dependencies)
6
6
 
7
7
  echo "🔍 Analyzing semantic map words from Crafter agent..."
8
- echo "Make sure the synth-ai service is running: uvx synth-ai serve"
8
+ echo "Make sure the synth-ai service is running: uvx synth-ai deploy --runtime uvicorn"
9
9
  echo ""
10
10
 
11
11
  cd synth_ai/environments/examples/crafter_classic/agent_demos/
@@ -14,4 +14,4 @@ cd synth_ai/environments/examples/crafter_classic/agent_demos/
14
14
  python analyze_semantic_words_markdown.py --model gemini-1.5-flash --episodes 3 --max-turns 30
15
15
 
16
16
  echo ""
17
- echo "✅ Analysis complete! Check the generated markdown report and JSON files."
17
+ echo "✅ Analysis complete! Check the generated markdown report and JSON files."
@@ -0,0 +1,98 @@
1
+ # Pokémon VL: Vision-Language RL Pipeline
2
+
3
+ This playbook demonstrates end-to-end vision-language reinforcement learning on Pokémon Red using Synth AI's CLI tools. We follow the eval → collect data → SFT → RL → eval pipeline, but with vision models throughout.
4
+
5
+ ## Overview
6
+
7
+ **Model**: Qwen3-VL-4B-Instruct (4B parameter vision-language model via Synth API)
8
+ **Environment**: Pokémon Red (Game Boy emulation with vision support)
9
+ **Benchmark**: Pallet Town progression task (leave bedroom → get starter → win first battle)
10
+
11
+ ## Pipeline Steps
12
+
13
+ 1. **Deploy Task App** - Host the Pokémon Red environment
14
+ 2. **Collect Vision Rollouts** - Generate high-quality demonstrations using Qwen3-VL
15
+ 3. **Filter Dataset** - Extract successful trajectories for supervised fine-tuning
16
+ 4. **Fine-Tune Qwen3-4B VL** - Train vision-language model on filtered data
17
+ 5. **Vision-Language RL** - Bootstrap RL training from SFT checkpoint
18
+ 6. **Final Evaluation** - Compare SFT and RL performance
19
+
20
+ ## Prerequisites
21
+
22
+ ```bash
23
+ # Install dependencies
24
+ uv pip install -e .
25
+
26
+ # Setup authentication
27
+ uvx synth-ai setup
28
+
29
+ # Copy environment template
30
+ cp examples/blog_posts/pokemon_vl/.env.example .env
31
+ ```
32
+
33
+ ## Quick Start
34
+
35
+ ```bash
36
+ # Export trace database path
37
+ export POKEMON_VL_TRACE_DB=traces/v3/pokemon_vl_blog.db
38
+
39
+ # 1. Deploy task app
40
+ uvx synth-ai deploy pokemon_red --runtime modal --name pokemon-vl-blog --env-file .env
41
+
42
+ # 2. Collect vision rollouts with Qwen3-VL
43
+ uvx synth-ai eval pokemon_red --config examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml --trace-db "${POKEMON_VL_TRACE_DB}"
44
+
45
+ # 3. Filter high-reward trajectories
46
+ uvx synth-ai filter --config examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml
47
+
48
+ # 4. Fine-tune Qwen3-4B VL
49
+ uvx synth-ai train --type sft --config examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml --env-file .env --poll
50
+
51
+ # 5. RL from SFT checkpoint (replace JOB_ID)
52
+ uvx synth-ai train --type rl --config examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml --env-file .env --poll
53
+
54
+ # 6. Evaluate final RL model
55
+ uvx synth-ai eval pokemon_red --config examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml --trace-db "${POKEMON_VL_TRACE_DB}"
56
+ ```
57
+
58
+ ## Vision Features
59
+
60
+ - **Full Game Boy Frames**: Base64-encoded PNG screenshots (160x144 resolution)
61
+ - **Vision-Only Mode**: Pure image understanding without text state
62
+ - **Vision + Text Mode**: Combined visual and structured state information
63
+ - **Efficient Action Batching**: `execute_sequence` tool for 5-10 actions per inference call
64
+
65
+ ## Expected Results
66
+
67
+ | Stage | Model | Mean Reward | Success Rate | Best Achievement |
68
+ |-------|-------|-------------|--------------|------------------|
69
+ | Initial | Qwen3-VL (vision) | ~150 | 60% | Win first battle |
70
+ | SFT | Qwen3-4B VL | ~200 | 75% | Win first battle + explore |
71
+ | RL | Qwen3-4B VL + RL | ~350 | 85% | Complete Pallet Town |
72
+
73
+ ## Files
74
+
75
+ - `configs/` - All TOML configuration files
76
+ - `ft_data/` - Filtered datasets for fine-tuning
77
+ - `.env.example` - Environment variables template
78
+
79
+ ## Vision Model Configuration
80
+
81
+ The vision models receive:
82
+ - **Input**: Game Boy screenshot + optional structured state (position, HP, party, etc.)
83
+ - **Output**: Sequence of button presses via `execute_sequence` tool
84
+ - **Action Space**: UP, DOWN, LEFT, RIGHT, A, B, START, SELECT with frame counts
85
+
86
+ ## Reward Function
87
+
88
+ Dense rewards for Pallet Town progression:
89
+ - Leave bedroom (+20)
90
+ - Exit house (+30)
91
+ - Find Oak's lab (+40)
92
+ - Talk to Oak (+50)
93
+ - Get starter Pokémon (+100)
94
+ - Enter battle (+75)
95
+ - Deal damage (+50 per 10HP)
96
+ - Win battle (+150)
97
+
98
+ Total possible: ~700 points
@@ -0,0 +1,25 @@
1
+ [eval]
2
+ app_id = "pokemon_red"
3
+ task_app_url = "https://synth-laboratories--pokemon-vl-qwen-xml-fastapi-app.modal.run"
4
+ model = "Qwen/Qwen3-VL-8B-Instruct" # Vision-capable Qwen3-VL model
5
+ seeds = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
6
+ max_turns = 10
7
+ concurrency = 2
8
+ env_name = "pokemon_red"
9
+ policy_name = "pokemon_vl_qwen3_vl"
10
+ trace_format = "full"
11
+ return_trace = true
12
+
13
+ [eval.policy_config]
14
+ provider = "synth" # Use Synth internal API for vision models
15
+ model = "Qwen/Qwen3-VL-8B-Instruct" # Vision-capable Qwen3-VL model
16
+ inference_url = "http://localhost:8000/api/inference/v1/chat/completions"
17
+ temperature = 1.0 # Higher temperature to encourage tool calling
18
+ top_p = 0.95
19
+ max_tokens = 4096
20
+ use_vision = true
21
+ image_only_mode = false
22
+ max_llm_calls = 10
23
+
24
+ [eval.env_config.env_params]
25
+ max_steps_per_episode = 10
@@ -0,0 +1,24 @@
1
+ [eval]
2
+ app_id = "pokemon_red"
3
+ task_app_url = "http://127.0.0.1:8914"
4
+ model = "fft:REPLACE-WITH-RL-JOB-ID" # Update with final RL job ID
5
+ seeds = [100, 101, 102, 103, 104, 105, 106, 107, 108, 109]
6
+ max_turns = 15 # Allow more steps for trained model
7
+ concurrency = 3
8
+ env_name = "pokemon_red"
9
+ policy_name = "pokemon_vl_rl_final"
10
+ trace_format = "full"
11
+ return_trace = true
12
+
13
+ [eval.policy_config]
14
+ provider = "synth"
15
+ model = "fft:REPLACE-WITH-RL-JOB-ID" # Update with final RL job ID
16
+ temperature = 0.1 # Lower temperature for evaluation
17
+ top_p = 0.9
18
+ max_tokens = 4096
19
+ use_vision = true
20
+ image_only_mode = false
21
+ max_llm_calls = 15
22
+
23
+ [eval.env_config.env_params]
24
+ max_steps_per_episode = 15
@@ -0,0 +1,10 @@
1
+ # Filter high-quality vision-language rollouts for SFT training
2
+ # Assumes traces stored in pokemon_vl_blog.db via eval commands
3
+
4
+ [filter]
5
+ db = "traces/v3/pokemon_vl_blog.db"
6
+ output = "examples/blog_posts/pokemon_vl/ft_data/pokemon_vl_high_reward.jsonl"
7
+ min_official_score = 0.3 # Require at least 30% completion (Pallet Town progression)
8
+ models = ["Qwen/Qwen3-VL-4B-Instruct"] # Vision models used for rollouts
9
+ shuffle = true
10
+ shuffle_seed = 42
@@ -0,0 +1,42 @@
1
+ # Vision-Language RL: Continue training Qwen3-4B VL from SFT checkpoint
2
+ # Update task_url with deployed Modal task app URL
3
+ # Set model.source to the SFT job id from `uvx synth-ai train --type sft`
4
+
5
+ type = "rl"
6
+
7
+ [services]
8
+ task_url = "http://127.0.0.1:8914"
9
+
10
+ [compute]
11
+ gpu_type = "H100"
12
+ gpu_count = 8
13
+
14
+ [topology]
15
+ gpus_for_vllm = 4
16
+ gpus_for_training = 3
17
+ gpus_for_ref = 1
18
+
19
+ [vllm]
20
+ tensor_parallel_size = 4
21
+
22
+ [model]
23
+ source = "fft:REPLACE-WITH-SFT-JOB-ID" # Update with actual SFT job ID
24
+ label = "pokemon_vl_rl_blog"
25
+ supports_vision = true
26
+
27
+ [rollout]
28
+ max_turns = 10
29
+ episodes_per_batch = 64
30
+
31
+ [evaluation]
32
+ instances = 100
33
+ every_n_iters = 20
34
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
35
+
36
+ [training]
37
+ log_interval = 1
38
+
39
+ [training.weight_sync]
40
+ enable = true
41
+ targets = ["policy"]
42
+ weight_sync_interval = 1
@@ -0,0 +1,40 @@
1
+ # Vision-Language Supervised Fine-Tuning: Qwen3-4B VL on filtered Pokémon rollouts
2
+ # Update the `data` path once `uvx synth-ai filter` produces your JSONL
3
+
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "fft"
8
+
9
+ [job]
10
+ model = "Qwen/Qwen3-VL-4B-Instruct" # Vision-enabled Qwen3-VL model
11
+ data = "../ft_data/pokemon_vl_high_reward.jsonl"
12
+ poll_seconds = 1800
13
+
14
+ [compute]
15
+ gpu_type = "H100"
16
+ gpu_count = 4
17
+ nodes = 1
18
+
19
+ [data.topology]
20
+ container_count = 4
21
+
22
+ [training]
23
+ mode = "full_finetune"
24
+ use_qlora = false
25
+
26
+ [hyperparameters]
27
+ n_epochs = 2
28
+ world_size = 4
29
+ sequence_length = 4096 # Longer for vision tokens + text
30
+ per_device_batch = 2
31
+ gradient_accumulation_steps = 64
32
+ learning_rate = 8e-6
33
+ warmup_ratio = 0.03
34
+
35
+ [hyperparameters.parallelism]
36
+ use_deepspeed = true
37
+ deepspeed_stage = 3
38
+ fsdp = false
39
+ bf16 = true
40
+ fp16 = false
@@ -0,0 +1,158 @@
1
+ # Crafter: From Rollouts to RL with the Synth AI CLI
2
+
3
+ This playbook mirrors the original “Warming Up to RL” walkthrough, but swaps the bespoke scripts for the first–class `uvx synth-ai` helpers. Every step—from deploying the task app to filtering rollouts, fine-tuning, and bootstrapping RL— now uses the same CLI you’d reach for in production.
4
+
5
+ All commands assume you are inside the repository root and have `uv`/`uvx` available.
6
+
7
+ ---
8
+
9
+ ## 0. Prerequisites
10
+
11
+ 1. Install dependencies and authenticate once:
12
+ ```bash
13
+ uv pip install -e .
14
+ uvx synth-ai setup
15
+ ```
16
+ The setup wizard writes the required `SYNTH_API_KEY`, `ENVIRONMENT_API_KEY`, and local `.env` helpers.
17
+
18
+ 2. Copy the example secrets if you need a starter file:
19
+ ```bash
20
+ cp examples/warming_up_to_rl/.env.example .env
21
+ ```
22
+
23
+ 3. Export the path we use for trace capture (optional but keeps things tidy):
24
+ ```bash
25
+ export CRAFTER_TRACE_DB=traces/v3/crafter_blog.db
26
+ ```
27
+
28
+ ---
29
+
30
+ ## 1. Ship the Crafter Task App
31
+
32
+ Deploy the hosted Crafter environment once. The Modal URL that prints at the end is reused by eval, SFT, and RL.
33
+
34
+ ```bash
35
+ uvx synth-ai deploy grpo-crafter \
36
+ --runtime modal \
37
+ --modal-mode serve \
38
+ --name crafter-blogpost \
39
+ --env-file .env
40
+ ```
41
+
42
+ For local testing you can run:
43
+
44
+ ```bash
45
+ uvx synth-ai deploy grpo-crafter \
46
+ --runtime uvicorn \
47
+ --port 8001 \
48
+ --trace traces/v3 \
49
+ --env-file .env
50
+ ```
51
+
52
+ Copy the Modal URL (e.g. `https://your-app.modal.run`) and replace the `task_app_url` placeholders inside every config under `examples/blog_posts/warming_up_to_rl/configs/`.
53
+
54
+ ---
55
+
56
+ ## 2. Collect High-Quality Rollouts
57
+
58
+ We lean on large teacher models to produce demonstrations. The configs in `configs/` already request full traces so we retain chain-of-thought.
59
+
60
+ Groq Qwen3-32B (text-only prompt):
61
+ ```bash
62
+ uvx synth-ai eval grpo-crafter \
63
+ --config examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml \
64
+ --trace-db "${CRAFTER_TRACE_DB}"
65
+ ```
66
+
67
+ GPT-OSS-120B via Groq’s OpenAI-compatible endpoint (also text-only):
68
+ ```bash
69
+ uvx synth-ai eval grpo-crafter \
70
+ --config examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml \
71
+ --trace-db "${CRAFTER_TRACE_DB}"
72
+ ```
73
+
74
+ Both configs disable image attachments and rely on the textual observation renderer (`format_observation`) so Groq stays within its supported modalities. If you want to try other models, keep `use_vision = false` unless the provider explicitly supports image inputs.
75
+
76
+ ---
77
+
78
+ ## 3. Filter Into an SFT Dataset
79
+
80
+ Once traces are stored in `CRAFT_TRACE_DB`, trim to the crisp trajectories:
81
+
82
+ ```bash
83
+ uvx synth-ai filter \
84
+ --config examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml
85
+ ```
86
+
87
+ The output JSONL lands in `ft_data/crafter_blog_high_reward.jsonl`, ready for supervised fine-tuning.
88
+
89
+ ---
90
+
91
+ ## 4. Fine-Tune Qwen3-4B with `uvx synth-ai train`
92
+
93
+ Update the dataset path (and optionally hyperparameters) in `train_sft_qwen4b.toml`, then launch:
94
+
95
+ ```bash
96
+ uvx synth-ai train \
97
+ --type sft \
98
+ --config examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml \
99
+ --env-file .env \
100
+ --poll
101
+ ```
102
+
103
+ Capture the returned job id (it looks like `fft:Qwen/Qwen3-4B:job_xxxxx`). We reuse that identifier in the evaluation and RL configs.
104
+ At any time you can list recently minted checkpoints with:
105
+
106
+ ```bash
107
+ uvx synth-ai status models
108
+ ```
109
+
110
+ The output table shows the canonical model name/ID alongside the source job.
111
+
112
+ ---
113
+
114
+ ## 5. Evaluate the Fine-Tuned Checkpoint
115
+
116
+ Replace both `REPLACE-WITH-SFT-JOB-ID` strings inside `eval_ft_qwen4b.toml`, then run:
117
+
118
+ ```bash
119
+ uvx synth-ai eval grpo-crafter \
120
+ --config examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml \
121
+ --trace-db "${CRAFTER_TRACE_DB}"
122
+ ```
123
+
124
+ This provides a clean, CLI-native comparison between the teacher rollouts and the fine-tuned model.
125
+
126
+ ---
127
+
128
+ ## 6. Kick Off RL from the Fine-Tuned Model
129
+
130
+ Point `train_rl_from_sft.toml` at the same Modal task app and set `model.source` to your SFT job id:
131
+
132
+ ```bash
133
+ uvx synth-ai train \
134
+ --type rl \
135
+ --config examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml \
136
+ --env-file .env \
137
+ --poll
138
+ ```
139
+
140
+ The CLI streams rollout and judge metrics in real time. When the run finishes, you can re-use the Stage 5 config (substituting the RL job id) to quantify the uplift.
141
+ If you lose track of the produced RL label or want to confirm the latest status, run:
142
+
143
+ ```bash
144
+ uvx synth-ai status jobs
145
+ uvx synth-ai status models
146
+ ```
147
+
148
+ The first command shows job completion state; the second surfaces model IDs you can plug into new eval configs.
149
+
150
+ ---
151
+
152
+ ## 7. Where to Go Next
153
+
154
+ - The original `examples/warming_up_to_rl` folder still contains deeper experiments (auto-curricula, modal renderers, etc.).
155
+ - Add more `eval_*.toml` configs to compare alternative judges or reward shaping strategies.
156
+ - Plug the filtered dataset into `uvx synth-ai files upload` if you want to share it with a teammate without copying JSONL around.
157
+
158
+ This directory now holds everything a blog post needs: configs, output locations, and the CLI entrypoints to reproduce the Crafter SFT → RL pipeline end-to-end.
@@ -0,0 +1,25 @@
1
+ # Evaluate the finetuned Qwen3-4B checkpoint on Crafter.
2
+ # Replace model with the fft: job id returned by the SFT run.
3
+
4
+ [eval]
5
+ app_id = "grpo-crafter"
6
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
7
+ model = "fft:REPLACE-WITH-SFT-JOB-ID"
8
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
9
+ max_turns = 10
10
+ concurrency = 4
11
+ env_name = "crafter"
12
+ policy_name = "crafter-react"
13
+ trace_format = "compact"
14
+ return_trace = false
15
+
16
+ [eval.policy_config]
17
+ provider = "synth"
18
+ model = "fft:REPLACE-WITH-SFT-JOB-ID"
19
+ temperature = 0.2
20
+ top_p = 0.8
21
+ max_tokens = 512
22
+ use_vision = true
23
+ image_only_mode = false
24
+ max_llm_calls = 10
25
+ tool_choice = "auto"
@@ -0,0 +1,25 @@
1
+ [eval]
2
+ app_id = "grpo-crafter"
3
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
4
+ model = "qwen/qwen3-32b"
5
+ seeds = [ 0, 1, 2,]
6
+ max_turns = 10
7
+ concurrency = 1
8
+ env_name = "crafter"
9
+ policy_name = "crafter-react"
10
+ trace_format = "full"
11
+ return_trace = true
12
+
13
+ [eval.policy_config]
14
+ provider = "groq"
15
+ model = "qwen/qwen3-32b"
16
+ inference_url = "https://api.groq.com/openai"
17
+ temperature = 0.6
18
+ top_p = 0.95
19
+ max_tokens = 8192
20
+ use_vision = false
21
+ image_only_mode = false
22
+ max_llm_calls = 10
23
+
24
+ [eval.env_config.env_params]
25
+ max_steps_per_episode = 10
@@ -0,0 +1,29 @@
1
+ # Crafter rollout config for GPT-OSS-120B served from OpenAI-compatible APIs.
2
+ # Replace the task_app_url with your deployed Crafter task app URL.
3
+ # The run stores full traces so we can keep the LLM reasoning for fine-tuning.
4
+
5
+ [eval]
6
+ app_id = "grpo-crafter"
7
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
8
+ model = "openai/gpt-oss-120b"
9
+ seeds = [0, 1, 2]
10
+ max_turns = 10
11
+ concurrency = 1
12
+ env_name = "crafter"
13
+ policy_name = "crafter-react"
14
+ trace_format = "full"
15
+ return_trace = true
16
+
17
+ [eval.env_config]
18
+ env_params = { max_steps_per_episode = 10 }
19
+
20
+ [eval.policy_config]
21
+ provider = "groq"
22
+ model = "openai/gpt-oss-120b"
23
+ inference_url = "https://api.groq.com/openai"
24
+ temperature = 0.6
25
+ top_p = 0.9
26
+ max_tokens = 768
27
+ use_vision = false
28
+ image_only_mode = false
29
+ max_llm_calls = 10
@@ -0,0 +1,10 @@
1
+ # Filters Crafter traces into an instruction-tuning dataset.
2
+ # Assumes you stored rollouts in traces/v3/crafter_blog.db via `uvx synth-ai eval`.
3
+
4
+ [filter]
5
+ db = "traces/v3/crafter_blog.db"
6
+ output = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
7
+ min_official_score = 0.1
8
+ models = ["qwen/qwen3-32b", "openai/gpt-oss-120b"]
9
+ shuffle = true
10
+ shuffle_seed = 42
@@ -0,0 +1,41 @@
1
+ # Continue training the finetuned Crafter policy with GRPO-style RL.
2
+ # Fill in task_url with your deployed task app and set model.source to the
3
+ # finetuned model id returned by `uvx synth-ai train --type sft`.
4
+
5
+ type = "rl"
6
+
7
+ [services]
8
+ task_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
9
+
10
+ [compute]
11
+ gpu_type = "H100"
12
+ gpu_count = 8
13
+
14
+ [topology]
15
+ gpus_for_vllm = 4
16
+ gpus_for_training = 3
17
+ gpus_for_ref = 1
18
+
19
+ [vllm]
20
+ tensor_parallel_size = 4
21
+
22
+ [model]
23
+ source = "fft:REPLACE-WITH-SFT-JOB-ID"
24
+ label = "crafter-rl-blogpost"
25
+
26
+ [rollout]
27
+ max_turns = 10
28
+ episodes_per_batch = 64
29
+
30
+ [evaluation]
31
+ instances = 100
32
+ every_n_iters = 20
33
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
34
+
35
+ [training]
36
+ log_interval = 1
37
+
38
+ [training.weight_sync]
39
+ enable = true
40
+ targets = ["policy"]
41
+ weight_sync_interval = 1
@@ -0,0 +1,40 @@
1
+ # Full-finetune Qwen3-4B on filtered Crafter traces.
2
+ # Update the `data` path once `uvx synth-ai filter` produces your JSONL.
3
+
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "fft"
8
+
9
+ [job]
10
+ model = "Qwen/Qwen3-4B"
11
+ data = "../ft_data/crafter_blog_high_reward.jsonl"
12
+ poll_seconds = 1800
13
+
14
+ [compute]
15
+ gpu_type = "H100"
16
+ gpu_count = 4
17
+ nodes = 1
18
+
19
+ [data.topology]
20
+ container_count = 4
21
+
22
+ [training]
23
+ mode = "full_finetune"
24
+ use_qlora = false
25
+
26
+ [hyperparameters]
27
+ n_epochs = 2
28
+ world_size = 4
29
+ sequence_length = 2048
30
+ per_device_batch = 2
31
+ gradient_accumulation_steps = 64
32
+ learning_rate = 8e-6
33
+ warmup_ratio = 0.03
34
+
35
+ [hyperparameters.parallelism]
36
+ use_deepspeed = true
37
+ deepspeed_stage = 3
38
+ fsdp = false
39
+ bf16 = true
40
+ fp16 = false
@@ -1,3 +1,8 @@
1
+ [algorithm]
2
+ type = "offline"
3
+ method = "sft"
4
+ variety = "qlora"
5
+
1
6
  [job]
2
7
  model = "Qwen/Qwen3-32B"
3
8
  # Optionally set here; you can also pass --dataset