synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,147 @@
1
+ # SFT Training for Qwen3-Coder-30B with LoRA
2
+
3
+ Supervised Fine-Tuning configuration for the same 30B MoE model used in RL training.
4
+
5
+ ## Configuration Overview
6
+
7
+ **Model:** `Qwen/Qwen3-Coder-30B-A3B-Instruct` (Mixture of Experts)
8
+
9
+ **Hardware:** 4x H200 GPUs (561GB total VRAM)
10
+
11
+ **Parallelism Strategy:**
12
+ - **Tensor Parallel (TP)**: 2 GPUs - Splits the model across 2 GPUs for inference/forward pass
13
+ - **Data Parallel (DP)**: 2 GPUs - Splits batches across 2 GPUs for training throughput
14
+
15
+ **LoRA Configuration:**
16
+ - Rank (r): 16
17
+ - Alpha: 32
18
+ - Dropout: 0.05
19
+ - Target modules: `["all-linear"]` - Applies LoRA to all linear layers
20
+
21
+ ## Memory Breakdown per GPU
22
+
23
+ With 4x H200 (141GB each):
24
+
25
+ **Model Split (TP=2):**
26
+ - 2 GPUs hold the base model (70GB each)
27
+ - ~70GB free per GPU for activations and gradients
28
+
29
+ **Training (DP=2):**
30
+ - 2 GPUs process different batches
31
+ - LoRA adapters: ~5-10GB per GPU
32
+ - Gradients/optimizer states: ~20-30GB per GPU
33
+ - **Total per training GPU: ~50-60GB** ✅
34
+
35
+ ## Quick Start
36
+
37
+ ### 1. Prepare Your Dataset
38
+
39
+ Your dataset should be in JSONL format with conversation turns:
40
+
41
+ ```jsonl
42
+ {"messages": [{"role": "system", "content": "..."}, {"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
43
+ {"messages": [{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]}
44
+ ```
45
+
46
+ ### 2. Run Training
47
+
48
+ ```bash
49
+ # Using the helper script
50
+ ./examples/multi_step/run_sft_qwen30b.sh path/to/your/dataset.jsonl
51
+
52
+ # Or directly with synth-ai CLI
53
+ uvx synth-ai train \
54
+ --type sft \
55
+ --config examples/multi_step/configs/crafter_sft_qwen30b_lora.toml \
56
+ --dataset path/to/your/dataset.jsonl \
57
+ --env-file backend/.env.dev
58
+ ```
59
+
60
+ ### 3. Monitor Training
61
+
62
+ Check the Synth dashboard for:
63
+ - Training loss curve
64
+ - Validation metrics (if validation set provided)
65
+ - GPU utilization
66
+ - Training throughput (tokens/sec)
67
+
68
+ ## Hyperparameters
69
+
70
+ **Batch Configuration:**
71
+ - Per-device batch size: 1
72
+ - Gradient accumulation: 64 steps
73
+ - **Effective global batch size: 128** (1 × 64 × 2 GPUs)
74
+
75
+ **Learning Rate:**
76
+ - Initial LR: 5e-6
77
+ - Warmup ratio: 3%
78
+ - Schedule: Linear decay
79
+
80
+ **Sequence Length:** 4096 tokens
81
+
82
+ **Training:**
83
+ - Epochs: 1
84
+ - Mixed precision: BF16
85
+ - DeepSpeed: Stage 2 (optimizer state sharding)
86
+ - Activation checkpointing: Enabled
87
+
88
+ ## Configuration File Structure
89
+
90
+ ```toml
91
+ [algorithm]
92
+ type = "offline" # Supervised (not RL)
93
+ method = "sft" # Supervised fine-tuning
94
+ variety = "lora" # Using LoRA adapters
95
+
96
+ [compute]
97
+ gpu_type = "H200"
98
+ gpu_count = 4
99
+
100
+ [data.topology]
101
+ tensor_parallel = 2 # Split model across 2 GPUs
102
+ data_parallel = 2 # Split batches across 2 GPUs
103
+
104
+ [training]
105
+ mode = "lora"
106
+ use_qlora = true # Quantized LoRA (4-bit base model)
107
+
108
+ [lora]
109
+ r = 16 # LoRA rank
110
+ alpha = 32 # LoRA scaling
111
+ dropout = 0.05
112
+ target_modules = ["all-linear"] # Apply to all linear layers
113
+ ```
114
+
115
+ ## Comparison with RL Config
116
+
117
+ | Aspect | SFT | RL |
118
+ |--------|-----|-----|
119
+ | Purpose | Supervised learning | Reinforcement learning |
120
+ | Data | Labeled examples | Environment interactions |
121
+ | Topology | TP=2, DP=2 | Split: 2 inference + 2 training |
122
+ | Batch size | 128 (effective) | Variable (episode-based) |
123
+ | Training | Standard backprop | Policy gradient (GSPO) |
124
+
125
+ ## Tips
126
+
127
+ 1. **Start Small:** Test with a small dataset first to verify the pipeline
128
+ 2. **Validation:** Add a validation set to monitor overfitting
129
+ 3. **Checkpointing:** Training saves checkpoints every 100 steps
130
+ 4. **Resume:** Can resume from checkpoint if training is interrupted
131
+ 5. **Inference:** After training, use the LoRA adapter with the base model
132
+
133
+ ## Output
134
+
135
+ After training completes, you'll get:
136
+ - LoRA adapter weights (saved to volume)
137
+ - Training metrics and logs
138
+ - Best checkpoint (based on validation loss)
139
+ - Model ready for inference or RL initialization
140
+
141
+ ## Next Steps
142
+
143
+ 1. **Evaluate:** Test your fine-tuned model on held-out data
144
+ 2. **RL Training:** Use this as initialization for RL (`init_from_sft = true`)
145
+ 3. **Deploy:** Load LoRA adapter for inference
146
+ 4. **Iterate:** Adjust hyperparameters based on performance
147
+
@@ -6,7 +6,7 @@ method = "policy_gradient"
6
6
  variety = "gspo"
7
7
 
8
8
  [services]
9
- # Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
9
+ # Replace with the Modal URL printed by `uvx synth-ai deploy --runtime modal --modal-mode serve grpo-crafter`
10
10
  task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
11
11
 
12
12
  [compute]
@@ -1,39 +1,32 @@
1
- # Crafter RL experiment – stepwise shaping with hosted judge rubrics
2
- #
3
- # This configuration extends the stepwise LoRA baseline by wiring the Synth judge
4
- # service so evaluation rolls combine dense step rewards with hosted rubric scoring.
5
-
6
1
  [algorithm]
7
2
  type = "online"
8
3
  method = "policy_gradient"
9
4
  variety = "gspo"
10
5
 
11
6
  [services]
12
- # Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
13
7
  task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
14
- # Point at the Synth backend (or compatible service) that exposes /api/judge/v1/*
15
8
  judge_url = "https://synth-backend-dev-docker.onrender.com/api"
16
9
 
17
10
  [compute]
18
11
  gpu_type = "H200"
19
- gpu_count = 2
12
+ gpu_count = 4
20
13
 
21
14
  [topology]
22
15
  type = "single_node_split"
23
- gpus_for_vllm = 1
24
- gpus_for_training = 1
16
+ gpus_for_vllm = 2
17
+ gpus_for_training = 2
25
18
  gpus_for_ref = 0
26
- tensor_parallel = 1
19
+ tensor_parallel = 2
27
20
 
28
21
  [vllm]
29
- tensor_parallel_size = 1
30
- max_model_len = 8192
22
+ tensor_parallel_size = 2
23
+ max_model_len = 4096
31
24
 
32
25
  [reference]
33
26
  placement = "none"
34
27
 
35
28
  [model]
36
- base = "Qwen/Qwen3-4B"
29
+ base = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
37
30
  trainer_mode = "lora"
38
31
  label = "crafter-rl-stepwise-hosted-judge"
39
32
 
@@ -41,7 +34,7 @@ label = "crafter-rl-stepwise-hosted-judge"
41
34
  r = 16
42
35
  alpha = 32
43
36
  dropout = 0.05
44
- target_modules = ["all-linear"]
37
+ target_modules = [ "all-linear",]
45
38
 
46
39
  [rollout]
47
40
  env_name = "crafter"
@@ -50,31 +43,16 @@ episodes_per_batch = 2
50
43
  policy_name = "crafter-react"
51
44
  max_concurrent_rollouts = 8
52
45
  batches_per_step = 2
53
- ops = ["agent", "env"]
54
-
55
- [rollout.env_config]
56
- difficulty = "easy"
57
-
58
- [rollout.env_config.step_rewards]
59
- enabled = true
60
- mode = "decision_stepwise"
61
- strategy = "consistent" # +1 for each decision that unlocks a new achievement
62
- indicator_lambda = 1.0
63
- step_beta = 0.0
64
-
65
- [rollout.policy_config]
66
- temperature = 0.2
67
- top_p = 0.95
68
- max_tokens = 512
46
+ ops = [ "agent", "env",]
69
47
 
70
48
  [evaluation]
71
49
  instances = 16
72
50
  every_n_iters = 10
73
- seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
51
+ seeds = [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,]
74
52
 
75
53
  [training]
76
54
  num_epochs = 1
77
- iterations_per_epoch = 20
55
+ iterations_per_epoch = 5
78
56
  gradient_accumulation_steps = 1
79
57
  max_accumulated_minibatch = 1
80
58
  max_turns = 10
@@ -84,104 +62,84 @@ learning_rate = 5e-5
84
62
  log_interval = 1
85
63
  weight_sync_interval = 1
86
64
  event_rewards_kind = "unique"
87
- async_semaphore_max = 40 # Max concurrent rollouts in streaming pipeline
88
-
89
- # Enable dense decision rewards in the trainer to mirror env_config step rewards.
65
+ async_semaphore_max = 4
90
66
  step_rewards_enabled = true
91
67
  step_rewards_mode = "decision_stepwise"
92
68
  step_rewards_indicator_lambda = 1.0
93
69
  step_rewards_beta = 0.0
94
70
  step_rewards_strategy = "consistent"
95
71
 
72
+ [rubric]
73
+ enabled = true
74
+
75
+ [rollout.env_config]
76
+ difficulty = "easy"
77
+
78
+ [rollout.policy_config]
79
+ temperature = 0.2
80
+ top_p = 0.95
81
+ max_tokens = 512
82
+
96
83
  [training.weight_sync]
97
84
  enable = true
98
- targets = ["policy"]
85
+ targets = [ "policy",]
99
86
  mode = "direct"
100
87
  direct = true
101
88
  verify_every_k = 0
102
89
 
103
- [rubric]
104
- enabled = true
105
- model = "openai/gpt-oss-120b"
106
- api_base = "https://synth-backend-dev-docker.onrender.com/api/judge"
107
- api_key_env = "OPENAI_API_KEY"
108
- # Blend the hosted judge scores with environment returns inside the trainer.
109
90
  [rubric.weights]
110
91
  env = 0.2
111
92
  event = 0.4
112
93
  outcome = 0.4
113
94
 
114
- [rubric.event]
115
- # Hosted judge rubric for per-decision progress scoring.
116
- rubric_id = "crafter/event@v1"
117
- criteria = [
118
- { key = "progress.unique_achievements", weight = 0.9, description = "Return 1 when this decision explicitly unlocks a brand-new Crafter achievement (inventory or status text confirms it this turn). Otherwise return 0.", aggregation = "weighted_sum" },
119
- { key = "process.intent_alignment", weight = 0.1, description = "Use at most 0.3 to acknowledge tightly coupled setup that finishes the last prerequisite; keep ≤0.1 when the agent only repositions or gathers without an imminent unlock.", aggregation = "weighted_sum" },
120
- ]
121
-
122
- [rubric.outcome]
123
- # Hosted judge rubric for final trajectory scoring.
124
- rubric_id = "crafter/outcome@v1"
125
- criteria = [
126
- { key = "outcome.goal_completion", weight = 0.6, description = "Full credit when the agent ends with strong survival metrics and a clear crafted milestone (e.g., iron tools, furnace).", aggregation = "weighted_sum" },
127
- { key = "outcome.achievement_depth", weight = 0.4, description = "Partial credit for intermediate achievements (saplings, wood/stone tools) that set up future success.", aggregation = "weighted_sum" },
128
- ]
129
-
130
- [judge]
131
- type = "groq" # or "groq" when routing to Groq-hosted judges
95
+ [judge.options]
96
+ event = true
97
+ outcome = true
98
+ provider = "openai"
99
+ model = "openai/gpt-oss-120b"
100
+ rubric_id = "crafter/bundle@v1"
132
101
  timeout_s = 45
133
102
 
134
- [judge.options]
135
- event = true
136
- outcome = true
137
- provider = "openai"
138
- model = "openai/gpt-oss-120b"
139
- rubric_id = "crafter/bundle@v1"
140
- max_concurrency = 6
141
- tracks = ["process", "reasoning", "progress", "outcome"]
142
-
143
- [judge.options.rubric_overrides]
144
-
145
- [judge.options.rubric_overrides.event]
146
- goal_text = """
147
- Treat each decision as a check for new Crafter achievements.
148
- Award the top score only when the log shows a fresh achievement unlock or an immediately verifiable deterministic completion.
149
- Keep otherwise useful setup actions in a narrow low band so non-achievement turns stay near zero."""
150
- aggregation = "weighted_sum"
151
-
152
- [[judge.options.rubric_overrides.event.criteria]]
153
- id = "progress.unique_achievements"
154
- weight = 0.9
155
- scale = "binary"
156
- description = "Return 1 when this decision explicitly unlocks a brand-new Crafter achievement (inventory or status text confirms it this turn). Otherwise return 0."
157
-
158
- [[judge.options.rubric_overrides.event.criteria]]
159
- id = "process.intent_alignment"
160
- weight = 0.1
161
- scale = "bounded"
162
- description = "Use at most 0.3 to acknowledge tightly coupled setup that finishes the last prerequisite; keep ≤0.1 when the agent only repositions or gathers without an imminent unlock."
163
-
164
- [judge.options.rubric_overrides.outcome]
165
- goal_text = """
166
- Summarise the episode outcome in relation to Crafter’s win condition:
167
- survive, accumulate resources, and craft advanced tools or structures.
168
- Highlight notable achievements, safety failures, and preparedness for future exploration."""
169
- aggregation = "weighted_sum"
170
-
171
- [[judge.options.rubric_overrides.outcome.criteria]]
172
- id = "outcome.goal_completion"
173
- weight = 0.6
174
- scale = "binary"
175
- description = "Full credit when the agent ends with strong survival metrics and a clear crafted milestone (e.g., iron tools, furnace)."
176
-
177
- [[judge.options.rubric_overrides.outcome.criteria]]
178
- id = "outcome.achievement_depth"
179
- weight = 0.4
180
- scale = "bounded"
181
- description = "Partial credit for intermediate achievements (saplings, wood/stone tools) that set up future success."
182
-
183
- [judge.options.weights]
184
- process = 0.05
185
- reasoning = 0.15
186
- progress = 0.30
187
- outcome = 0.50
103
+ [rollout.env_config.step_rewards]
104
+ enabled = true
105
+ mode = "decision_stepwise"
106
+ strategy = "consistent"
107
+ indicator_lambda = 1.0
108
+ step_beta = 0.0
109
+
110
+ [judge.options.weights]
111
+ process = 0.05
112
+ reasoning = 0.15
113
+ progress = 0.3
114
+ outcome = 0.5
115
+
116
+ [judge.options.rubric_overrides.event]
117
+ goal_text = "Treat each decision as a check for new Crafter achievements.\nAward the top score only when the log shows a fresh achievement unlock or an immediately verifiable deterministic completion.\nKeep otherwise useful setup actions in a narrow low band so non-achievement turns stay near zero."
118
+ aggregation = "weighted_sum"
119
+ [[judge.options.rubric_overrides.event.criteria]]
120
+ id = "progress.unique_achievements"
121
+ weight = 0.9
122
+ scale = "binary"
123
+ description = "Return 1 when this decision explicitly unlocks a brand-new Crafter achievement (inventory or status text confirms it this turn). Otherwise return 0."
124
+
125
+ [[judge.options.rubric_overrides.event.criteria]]
126
+ id = "process.intent_alignment"
127
+ weight = 0.1
128
+ scale = "bounded"
129
+ description = "Use at most 0.3 to acknowledge tightly coupled setup that finishes the last prerequisite; keep ≤0.1 when the agent only repositions or gathers without an imminent unlock."
130
+
131
+ [judge.options.rubric_overrides.outcome]
132
+ goal_text = "Summarise the episode outcome in relation to Crafter’s win condition:\nsurvive, accumulate resources, and craft advanced tools or structures.\nHighlight notable achievements, safety failures, and preparedness for future exploration."
133
+ aggregation = "weighted_sum"
134
+ [[judge.options.rubric_overrides.outcome.criteria]]
135
+ id = "outcome.goal_completion"
136
+ weight = 0.6
137
+ scale = "binary"
138
+ description = "Full credit when the agent ends with strong survival metrics and a clear crafted milestone (e.g., iron tools, furnace)."
139
+
140
+ [[judge.options.rubric_overrides.outcome.criteria]]
141
+ id = "outcome.achievement_depth"
142
+ weight = 0.4
143
+ scale = "bounded"
144
+ description = "Partial credit for intermediate achievements (saplings, wood/stone tools) that set up future success."
145
+
@@ -6,7 +6,7 @@ method = "policy_gradient"
6
6
  variety = "gspo"
7
7
 
8
8
  [services]
9
- # Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
9
+ # Replace with the Modal URL printed by `uvx synth-ai deploy --runtime modal --modal-mode serve grpo-crafter`
10
10
  task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
11
11
 
12
12
  [compute]
@@ -6,7 +6,7 @@ method = "policy_gradient"
6
6
  variety = "gspo"
7
7
 
8
8
  [services]
9
- # Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
9
+ # Replace with the Modal URL printed by `uvx synth-ai deploy --runtime modal --modal-mode serve grpo-crafter`
10
10
  task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
11
11
 
12
12
  [compute]
@@ -0,0 +1,105 @@
1
+ # Crafter RL experiment – simple stepwise rewards (1 point per new achievement)
2
+ # This config uses the NEW unified [policy] section format
3
+
4
+ [algorithm]
5
+ type = "online"
6
+ method = "policy_gradient"
7
+ variety = "gspo"
8
+
9
+ [services]
10
+ # Replace with the Modal URL printed by `uvx synth-ai deploy --runtime modal --modal-mode serve grpo-crafter`
11
+ task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
12
+
13
+ [compute]
14
+ gpu_type = "H200"
15
+ gpu_count = 2
16
+
17
+ [compute.topology] # Nested: topology is part of compute
18
+ type = "single_node_split"
19
+ gpus_for_vllm = 1
20
+ gpus_for_training = 1
21
+ gpus_for_ref = 0
22
+ tensor_parallel = 1
23
+ reference_placement = "none" # Reference model placement
24
+
25
+ [vllm]
26
+ tensor_parallel_size = 1
27
+ max_model_len = 8192
28
+
29
+ [judge]
30
+ enabled = false # Set to true to enable judge/rubric scoring
31
+
32
+ # Uncomment to enable judge-based reward blending:
33
+ # enabled = true
34
+ # timeout_s = 45
35
+ #
36
+ # [judge.reward_blend] # How to blend env/event/outcome reward sources
37
+ # env = 0.2
38
+ # event = 0.4
39
+ # outcome = 0.4
40
+ #
41
+ # [judge.options]
42
+ # provider = "openai"
43
+ # model = "openai/gpt-oss-120b"
44
+ # event = true
45
+ # outcome = true
46
+ # max_concurrency = 6
47
+
48
+ # NEW: Unified [policy] section - single source of truth for model and sampling
49
+ [policy]
50
+ model_name = "Qwen/Qwen3-4B"
51
+ trainer_mode = "lora"
52
+ label = "crafter-rl-stepwise-simple"
53
+
54
+ # Sampling parameters for rollouts
55
+ max_tokens = 512
56
+ temperature = 0.6
57
+ top_p = 0.95
58
+
59
+ [rollout]
60
+ env_name = "crafter"
61
+ max_turns = 10
62
+ episodes_per_batch = 4
63
+ policy_name = "crafter-react"
64
+ max_concurrent_rollouts = 8
65
+ batches_per_step = 2
66
+ ops = ["agent", "env"]
67
+
68
+ [evaluation]
69
+ instances = 10
70
+ every_n_iters = 10
71
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
72
+
73
+ [training]
74
+ num_epochs = 1
75
+ iterations_per_epoch = 10
76
+ gradient_accumulation_steps = 1
77
+ max_accumulated_minibatch = 1
78
+ max_turns = 10
79
+ batch_size = 4
80
+ group_size = 4
81
+ learning_rate = 5e-5
82
+ log_interval = 1
83
+ weight_sync_interval = 1
84
+
85
+ [training.rewards] # Nested: Reward config under training
86
+ step_rewards_enabled = true
87
+ step_rewards_mode = "decision_stepwise"
88
+ step_rewards_indicator_lambda = 1.0
89
+ step_rewards_beta = 0.0
90
+ step_rewards_strategy = "consistent"
91
+ event_rewards_kind = "unique"
92
+
93
+ [training.lora] # Nested: LoRA config under training
94
+ r = 16
95
+ alpha = 32
96
+ dropout = 0.05
97
+ target_modules = ["all-linear"]
98
+
99
+ [training.weight_sync]
100
+ enable = true
101
+ targets = ["policy"]
102
+ mode = "direct"
103
+ direct = true
104
+ verify_every_k = 0
105
+
@@ -0,0 +1,62 @@
1
+ # Crafter SFT LoRA configuration
2
+ # Train Qwen3-Coder-30B on Crafter agent traces
3
+
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "lora"
8
+
9
+ [job]
10
+ model = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
11
+ # Default dataset - can override with --dataset flag
12
+ data = "traces/crafter_sft_converted.jsonl"
13
+
14
+ [compute]
15
+ gpu_type = "H200"
16
+ gpu_count = 2
17
+ nodes = 1
18
+
19
+ [data]
20
+ # Forwarded into metadata.effective_config
21
+ topology = {}
22
+ # Optional validation set if you have one locally
23
+ # validation_path = "examples/multi_step/ft_data/crafter_sft.val.jsonl"
24
+
25
+ [training]
26
+ mode = "lora"
27
+ use_qlora = true
28
+
29
+ [training.validation]
30
+ enabled = true
31
+ evaluation_strategy = "steps"
32
+ eval_steps = 100
33
+ save_best_model_at_end = true
34
+ metric_for_best_model = "val.loss"
35
+ greater_is_better = false
36
+
37
+ [hyperparameters]
38
+ n_epochs = 1
39
+ train_kind = "peft"
40
+ per_device_batch = 1
41
+ gradient_accumulation_steps = 64
42
+ sequence_length = 4096
43
+ learning_rate = 5e-6
44
+ warmup_ratio = 0.03
45
+ lora_rank = 16
46
+ lora_alpha = 32
47
+ lora_dropout = 0.05
48
+ lora_target_modules = ["all-linear"]
49
+
50
+ [hyperparameters.parallelism]
51
+ use_deepspeed = true
52
+ deepspeed_stage = 2
53
+ fsdp = false
54
+ bf16 = true
55
+ fp16 = false
56
+ activation_checkpointing = true
57
+
58
+ [tags]
59
+ experiment = "crafter_sft_lora_qwen_coder_30b"
60
+ task = "crafter_agent"
61
+ model_size = "30b"
62
+