synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,25 @@
1
+ [eval]
2
+ app_id = "grpo-crafter"
3
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
4
+ model = "Qwen/Qwen3-4B"
5
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
6
+ max_turns = 20
7
+ concurrency = 1
8
+ env_name = "crafter"
9
+ policy_name = "crafter-react"
10
+ trace_format = "structured"
11
+ return_trace = true
12
+
13
+ [eval.policy_config]
14
+ provider = "synth"
15
+ model = "Qwen/Qwen3-4B"
16
+ inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
17
+ temperature = 0.6
18
+ top_p = 0.95
19
+ max_tokens = 2048
20
+ use_vision = false
21
+ image_only_mode = false
22
+ max_llm_calls = 10
23
+
24
+ [eval.env_config.env_params]
25
+ max_steps_per_episode = 20
@@ -0,0 +1,25 @@
1
+ # Evaluate the finetuned Qwen3-4B checkpoint on Crafter.
2
+ # Replace model with the fft: job id returned by the SFT run.
3
+
4
+ [eval]
5
+ app_id = "grpo-crafter"
6
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
7
+ model = "fft:REPLACE-WITH-SFT-JOB-ID"
8
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
9
+ max_turns = 10
10
+ concurrency = 4
11
+ env_name = "crafter"
12
+ policy_name = "crafter-react"
13
+ trace_format = "compact"
14
+ return_trace = false
15
+
16
+ [eval.policy_config]
17
+ provider = "synth"
18
+ model = "fft:REPLACE-WITH-SFT-JOB-ID"
19
+ temperature = 0.2
20
+ top_p = 0.8
21
+ max_tokens = 512
22
+ use_vision = true
23
+ image_only_mode = false
24
+ max_llm_calls = 10
25
+ tool_choice = "auto"
@@ -0,0 +1,26 @@
1
+ [eval]
2
+ app_id = "grpo-crafter"
3
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
4
+ model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
5
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
6
+ max_turns = 20
7
+ concurrency = 2
8
+ env_name = "crafter"
9
+ policy_name = "crafter-react"
10
+ trace_format = "structured"
11
+ return_trace = true
12
+
13
+ [eval.policy_config]
14
+ provider = "synth"
15
+ model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
16
+ inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
17
+ temperature = 0.2
18
+ top_p = 0.8
19
+ max_tokens = 1024
20
+ use_vision = false
21
+ image_only_mode = false
22
+ max_llm_calls = 10
23
+ tool_choice = "auto"
24
+
25
+ [eval.env_config.env_params]
26
+ max_steps_per_episode = 20
@@ -0,0 +1,25 @@
1
+ [eval]
2
+ app_id = "grpo-crafter"
3
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
4
+ model = "qwen/qwen3-32b"
5
+ seeds = [ 0, 1, 2,]
6
+ max_turns = 10
7
+ concurrency = 1
8
+ env_name = "crafter"
9
+ policy_name = "crafter-react"
10
+ trace_format = "full"
11
+ return_trace = true
12
+
13
+ [eval.policy_config]
14
+ provider = "groq"
15
+ model = "qwen/qwen3-32b"
16
+ inference_url = "https://api.groq.com/openai"
17
+ temperature = 0.6
18
+ top_p = 0.95
19
+ max_tokens = 8192
20
+ use_vision = false
21
+ image_only_mode = false
22
+ max_llm_calls = 10
23
+
24
+ [eval.env_config.env_params]
25
+ max_steps_per_episode = 10
@@ -0,0 +1,29 @@
1
+ # Crafter rollout config for GPT-OSS-120B served from OpenAI-compatible APIs.
2
+ # Replace the task_app_url with your deployed Crafter task app URL.
3
+ # The run stores full traces so we can keep the LLM reasoning for fine-tuning.
4
+
5
+ [eval]
6
+ app_id = "grpo-crafter"
7
+ task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
8
+ model = "openai/gpt-oss-120b"
9
+ seeds = [0, 1, 2]
10
+ max_turns = 10
11
+ concurrency = 1
12
+ env_name = "crafter"
13
+ policy_name = "crafter-react"
14
+ trace_format = "full"
15
+ return_trace = true
16
+
17
+ [eval.env_config]
18
+ env_params = { max_steps_per_episode = 10 }
19
+
20
+ [eval.policy_config]
21
+ provider = "groq"
22
+ model = "openai/gpt-oss-120b"
23
+ inference_url = "https://api.groq.com/openai"
24
+ temperature = 0.6
25
+ top_p = 0.9
26
+ max_tokens = 768
27
+ use_vision = false
28
+ image_only_mode = false
29
+ max_llm_calls = 10
@@ -0,0 +1,10 @@
1
+ # Filters Crafter traces into an instruction-tuning dataset.
2
+ # Assumes you stored rollouts in traces/v3/crafter_blog.db via `uvx synth-ai eval`.
3
+
4
+ [filter]
5
+ db = "sqlite+libsql://http://127.0.0.1:8080"
6
+ output = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
7
+ min_official_score = 0.1
8
+ models = ["qwen/qwen3-32b", "openai/gpt-oss-120b"]
9
+ shuffle = true
10
+ shuffle_seed = 42
@@ -0,0 +1,75 @@
1
+ # Example RL config with smoke testing enabled
2
+ # This config demonstrates auto-starting task app and sqld for easy smoke testing
3
+
4
+ type = "rl"
5
+
6
+ # Smoke testing configuration - AUTO-STARTS services in background!
7
+ [smoke]
8
+ # Auto-start the task app server
9
+ task_app_name = "grpo-crafter" # Your task app name (use "synth-ai task-app list" to see available apps)
10
+ task_app_port = 8765
11
+ task_app_env_file = ".env" # Required for this task app
12
+ task_app_force = true # Kill any existing process on this port
13
+
14
+ # Auto-start sqld for tracing
15
+ sqld_auto_start = true
16
+ sqld_db_path = "./traces/local.db"
17
+ sqld_hrana_port = 8080
18
+ sqld_http_port = 8081
19
+
20
+ # Test parameters
21
+ env_name = "crafter"
22
+ policy_name = "crafter-react"
23
+ max_steps = 10
24
+ policy = "gpt-5-nano" # Use gpt-5-nano policy with mock backend
25
+ model = "gpt-4o-mini" # Real model to use via OpenAI
26
+ mock_backend = "openai" # Use OpenAI backend for real inference and tool calls
27
+ return_trace = true
28
+ use_mock = true # Use mock proxy that routes to OpenAI
29
+
30
+ # RL Training Configuration (used by actual training, not smoke tests)
31
+ [algorithm]
32
+ type = "online"
33
+ method = "policy_gradient"
34
+ variety = "gspo"
35
+
36
+ [policy]
37
+ model_name = "Qwen/Qwen3-4B"
38
+ trainer_mode = "full"
39
+ label = "crafter-rl-demo"
40
+
41
+ [compute]
42
+ gpu_type = "H100"
43
+ gpu_count = 2
44
+
45
+ [compute.topology]
46
+ type = "single_node_split"
47
+ gpus_for_vllm = 1
48
+ gpus_for_training = 1
49
+
50
+ [services]
51
+ task_url = "http://localhost:8765"
52
+
53
+ [rollout]
54
+ env_name = "crafter"
55
+ policy_name = "crafter-react"
56
+ max_turns = 10
57
+ episodes_per_batch = 16
58
+ max_concurrent_rollouts = 4
59
+ task_app_origin_rewards_only = true
60
+
61
+ [training]
62
+ num_epochs = 1
63
+ iterations_per_epoch = 10
64
+ max_turns = 10
65
+ batch_size = 4
66
+ group_size = 4
67
+ learning_rate = 5e-5
68
+ weight_sync_interval = 1
69
+ log_interval = 1
70
+
71
+ [evaluation]
72
+ instances = 2
73
+ every_n_iters = 1
74
+ seeds = [0, 1]
75
+
@@ -0,0 +1,91 @@
1
+ # Continue training the finetuned Crafter policy with GRPO-style RL.
2
+ # Fill in task_url with your deployed task app and set model.source to the
3
+ # finetuned model id returned by `uvx synth-ai train --type sft`.
4
+
5
+ type = "rl"
6
+
7
+ # [smoke] section is OPTIONAL and only used by `synth-ai smoke` command for local testing.
8
+ # This section is completely IGNORED by the RL trainer and will not affect training jobs.
9
+ # It allows you to quickly test your task app without passing many CLI arguments:
10
+ # uvx synth-ai smoke --config this-file.toml
11
+ # All values are optional; CLI args override TOML values.
12
+ [smoke]
13
+ task_url = "https://synth-laboratories--crafter-blogpost-fastapi-app-dev.modal.run"
14
+ env_name = "crafter"
15
+ policy_name = "crafter-react"
16
+ max_steps = 10
17
+ policy = "mock" # mock, gpt-5-nano, openai, groq
18
+ model = "gpt-5-nano"
19
+ mock_backend = "openai" # synthetic or openai
20
+ mock_port = 0 # 0 = auto-assign
21
+ return_trace = true
22
+ use_mock = true
23
+
24
+ [algorithm]
25
+ type = "online"
26
+ method = "policy_gradient"
27
+ variety = "gspo"
28
+
29
+ [services]
30
+ task_url = "https://synth-laboratories--crafter-blogpost-fastapi-app-dev.modal.run"
31
+ judge_url = "https://synth-backend-dev-docker.onrender.com/api"
32
+
33
+ [compute]
34
+ gpu_type = "H200"
35
+ gpu_count = 2
36
+ [compute.topology]
37
+ reference_placement = "none"
38
+
39
+ [topology]
40
+ type = "single_node_split"
41
+ reference_placement = "none"
42
+ gpus_for_vllm = 1
43
+ gpus_for_training = 1
44
+ gpus_for_ref = 0
45
+ tensor_parallel = 1
46
+
47
+ [vllm]
48
+ tensor_parallel_size = 1
49
+ max_model_len = 8192
50
+
51
+ [reference]
52
+ placement = "none"
53
+
54
+ [model]
55
+ base = "Qwen/Qwen3-4B"
56
+ trainer_mode = "lora"
57
+ label = "crafter-rl-baseline"
58
+
59
+ [rollout]
60
+ env_name = "crafter"
61
+ policy_name = "crafter-react"
62
+ max_turns = 10
63
+ episodes_per_batch = 20
64
+ max_concurrent_rollouts = 8
65
+ rubric_rewards_only = false
66
+ task_app_origin_rewards_only = true
67
+
68
+ [evaluation]
69
+ instances = 100
70
+ every_n_iters = 20
71
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]
72
+
73
+ [training]
74
+ num_epochs = 1
75
+ iterations_per_epoch = 1
76
+ max_turns = 10
77
+ batch_size = 2
78
+ group_size = 2
79
+ learning_rate = 5e-6
80
+ weight_sync_interval = 1
81
+ log_interval = 1
82
+ max_completion_tokens = 256
83
+ async_semaphore_max = 4
84
+
85
+ [training.weight_sync]
86
+ enable = true
87
+ targets = ["policy"]
88
+ weight_sync_interval = 1
89
+
90
+ [rubric]
91
+ enabled = false
@@ -0,0 +1,40 @@
1
+ # Full-finetune Qwen3-4B on filtered Crafter traces.
2
+ # Update the `data` path once `uvx synth-ai filter` produces your JSONL.
3
+
4
+ [algorithm]
5
+ type = "offline"
6
+ method = "sft"
7
+ variety = "fft"
8
+
9
+ [job]
10
+ model = "Qwen/Qwen3-4B"
11
+ data = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
12
+ poll_seconds = 1800
13
+
14
+ [compute]
15
+ gpu_type = "H100"
16
+ gpu_count = 4
17
+ nodes = 1
18
+
19
+ [data.topology]
20
+ container_count = 4
21
+
22
+ [training]
23
+ mode = "full_finetune"
24
+ use_qlora = false
25
+
26
+ [hyperparameters]
27
+ n_epochs = 2
28
+ world_size = 4
29
+ sequence_length = 2048
30
+ per_device_batch = 2
31
+ gradient_accumulation_steps = 64
32
+ learning_rate = 8e-6
33
+ warmup_ratio = 0.03
34
+
35
+ [hyperparameters.parallelism]
36
+ use_deepspeed = true
37
+ deepspeed_stage = 3
38
+ fsdp = false
39
+ bf16 = true
40
+ fp16 = false
@@ -0,0 +1,187 @@
1
+ """Warming Up to RL baseline for Crafter.
2
+
3
+ This baseline demonstrates how to evaluate an LLM agent on the Crafter survival game
4
+ without requiring a deployed task app. This is the recommended starting point for coding
5
+ agents to get a baseline score before making changes.
6
+
7
+ Quick Start:
8
+ # Run a quick 3-task baseline
9
+ uvx synth-ai baseline warming_up_to_rl --split train --seeds 0,1,2
10
+
11
+ # Full train evaluation
12
+ uvx synth-ai baseline warming_up_to_rl --split train
13
+
14
+ # Compare models
15
+ uvx synth-ai baseline warming_up_to_rl --model groq:openai/gpt-oss-20b
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ from typing import Any
22
+
23
+ try:
24
+ import crafter
25
+ CRAFTER_AVAILABLE = True
26
+ except ImportError:
27
+ CRAFTER_AVAILABLE = False
28
+
29
+ from synth_ai.baseline import BaselineConfig, BaselineTaskRunner, DataSplit, TaskResult
30
+ from synth_ai.types import EventReward, OutcomeReward
31
+
32
+
33
+ class CrafterRunner(BaselineTaskRunner):
34
+ """Task runner for Crafter environment."""
35
+
36
+ def __init__(self, policy_config: dict[str, Any], env_config: dict[str, Any]):
37
+ super().__init__(policy_config, env_config)
38
+ self.max_steps = env_config.get("max_steps", 1000)
39
+
40
+ async def run_task(self, seed: int) -> TaskResult:
41
+ """Run a single Crafter episode."""
42
+ if not CRAFTER_AVAILABLE:
43
+ raise ImportError(
44
+ "Crafter not installed. Install with: pip install crafter"
45
+ )
46
+
47
+ # Create environment
48
+ env = crafter.Env()
49
+ env.reset()
50
+
51
+ # Initialize tracking
52
+ event_rewards: list[EventReward] = []
53
+ achievements = {}
54
+ step_count = 0
55
+
56
+ # Get model configuration
57
+ from synth_ai.inference.client import InferenceClient
58
+
59
+ client = InferenceClient()
60
+ model = self.policy_config.get("model", "gpt-4o-mini")
61
+ temperature = self.policy_config.get("temperature", 0.7)
62
+
63
+ # Define action tool
64
+ actions = [
65
+ "noop", "move_left", "move_right", "move_up", "move_down",
66
+ "do", "sleep", "place_stone", "place_table", "place_furnace",
67
+ "place_plant", "make_wood_pickaxe", "make_stone_pickaxe",
68
+ "make_iron_pickaxe", "make_wood_sword", "make_stone_sword",
69
+ "make_iron_sword"
70
+ ]
71
+
72
+ action_tool = {
73
+ "type": "function",
74
+ "function": {
75
+ "name": "take_action",
76
+ "description": "Take an action in the Crafter world",
77
+ "parameters": {
78
+ "type": "object",
79
+ "properties": {
80
+ "action": {
81
+ "type": "string",
82
+ "enum": actions,
83
+ "description": f"Action to take. Available: {', '.join(actions)}",
84
+ }
85
+ },
86
+ "required": ["action"],
87
+ },
88
+ },
89
+ }
90
+
91
+ # Run episode
92
+ done = False
93
+ while not done and step_count < self.max_steps:
94
+ # Get observation (would include visual state in full implementation)
95
+ obs_str = f"Crafter Step {step_count}\n"
96
+ obs_str += f"Current achievements: {achievements}\n"
97
+ obs_str += "What action should you take to survive and progress?"
98
+
99
+ # Get action from model
100
+ try:
101
+ response = await client.generate(
102
+ model=model,
103
+ messages=[
104
+ {
105
+ "role": "system",
106
+ "content": "You are an expert at survival games. Use the take_action tool to survive and achieve goals in Crafter.",
107
+ },
108
+ {"role": "user", "content": obs_str},
109
+ ],
110
+ tools=[action_tool],
111
+ temperature=temperature,
112
+ max_tokens=100,
113
+ )
114
+
115
+ # Extract action
116
+ action_name = "noop"
117
+ if response.get("tool_calls"):
118
+ tool_call = response["tool_calls"][0]
119
+ args = json.loads(tool_call["function"]["arguments"])
120
+ action_name = args.get("action", "noop")
121
+
122
+ action_idx = actions.index(action_name) if action_name in actions else 0
123
+
124
+ # Take step
125
+ obs, reward, done, info = env.step(action_idx)
126
+
127
+ # Update achievements
128
+ if "achievements" in info:
129
+ achievements.update(info["achievements"])
130
+
131
+ # Track rewards
132
+ if reward > 0:
133
+ event_rewards.append(
134
+ EventReward(
135
+ event_id=f"step_{step_count}",
136
+ reward=reward,
137
+ metadata={"action": action_name, "achievements": achievements.copy()},
138
+ )
139
+ )
140
+
141
+ step_count += 1
142
+
143
+ except Exception as e:
144
+ done = True
145
+ break
146
+
147
+ # Calculate outcome reward based on achievements
148
+ total_achievements = sum(achievements.values())
149
+ success = total_achievements >= 3 # At least 3 achievements
150
+
151
+ return TaskResult(
152
+ success=success,
153
+ outcome_reward=OutcomeReward(
154
+ reward=float(total_achievements),
155
+ metadata={
156
+ "steps": step_count,
157
+ "achievements": achievements,
158
+ "seed": seed,
159
+ },
160
+ ),
161
+ event_rewards=event_rewards,
162
+ total_steps=step_count,
163
+ metadata={"achievements": achievements},
164
+ )
165
+
166
+
167
+ # Define baseline configuration (only if Crafter is available)
168
+ if CRAFTER_AVAILABLE:
169
+ warming_up_to_rl_baseline = BaselineConfig(
170
+ baseline_id="warming_up_to_rl",
171
+ name="Warming Up to RL - Crafter",
172
+ description="Crafter survival game baseline for comparing agent performance on RL tasks",
173
+ task_runner=CrafterRunner,
174
+ splits={
175
+ "train": DataSplit(name="train", seeds=list(range(20))),
176
+ "val": DataSplit(name="val", seeds=list(range(20, 25))),
177
+ "test": DataSplit(name="test", seeds=list(range(25, 30))),
178
+ },
179
+ default_policy_config={
180
+ "model": "gpt-4o-mini",
181
+ "temperature": 0.7,
182
+ },
183
+ default_env_config={
184
+ "max_steps": 1000,
185
+ },
186
+ tags=["rl", "survival", "achievements", "blog-post"],
187
+ )
@@ -1,3 +1,8 @@
1
+ [algorithm]
2
+ type = "offline"
3
+ method = "sft"
4
+ variety = "qlora"
5
+
1
6
  [job]
2
7
  model = "Qwen/Qwen3-32B"
3
8
  # Optionally set here; you can also pass --dataset
@@ -88,3 +88,7 @@ Expected output for successful rollout:
88
88
  - `mean_return` ≈ 1.0+ (if full submit success)
89
89
 
90
90
 
91
+
92
+
93
+
94
+
@@ -181,3 +181,7 @@ Before starting RL training, verify:
181
181
  - [verilog_rl_lora.toml](./verilog_rl_lora.toml) - Training configuration
182
182
 
183
183
 
184
+
185
+
186
+
187
+
@@ -6,7 +6,7 @@ method = "policy_gradient"
6
6
  variety = "gspo"
7
7
 
8
8
  [services]
9
- # Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
9
+ # Replace with the Modal URL printed by `uvx synth-ai deploy --runtime modal --modal-mode serve grpo-crafter`
10
10
  task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
11
11
 
12
12
  [compute]
@@ -46,6 +46,7 @@ policy_name = "crafter-react"
46
46
  max_concurrent_rollouts = 12
47
47
  batches_per_step = 2
48
48
  ops = ["agent", "env"]
49
+ task_app_origin_rewards_only = true
49
50
 
50
51
  [evaluation]
51
52
  instances = 10