synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/blog_posts/pokemon_vl/README.md +98 -0
  3. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  5. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  6. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  7. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  8. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  9. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  12. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  13. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  15. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  16. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  17. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  18. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  20. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  21. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  22. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  23. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  24. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  25. examples/qwen_vl/README.md +10 -12
  26. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  27. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  28. examples/qwen_vl/collect_data_via_cli.md +76 -84
  29. examples/qwen_vl/collect_vision_traces.py +4 -4
  30. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  31. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  32. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  33. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  34. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  35. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  36. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  37. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  38. examples/qwen_vl/run_vision_comparison.sh +6 -7
  39. examples/rl/README.md +5 -5
  40. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  41. examples/rl/configs/rl_from_base_qwen17.toml +5 -2
  42. examples/rl/task_app/README.md +1 -2
  43. examples/rl/task_app/math_single_step.py +2 -2
  44. examples/run_crafter_demo.sh +2 -2
  45. examples/sft/README.md +1 -1
  46. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  47. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  48. examples/swe/task_app/README.md +32 -2
  49. examples/swe/task_app/grpo_swe_mini.py +4 -0
  50. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  51. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  52. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  53. examples/swe/task_app/morph_backend.py +178 -0
  54. examples/task_apps/crafter/task_app/README.md +1 -1
  55. examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
  56. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  57. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  58. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  59. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
  60. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
  61. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
  62. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  63. examples/task_apps/math/README.md +1 -2
  64. examples/task_apps/pokemon_red/README.md +3 -4
  65. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  66. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  67. examples/task_apps/pokemon_red/task_app.py +36 -5
  68. examples/task_apps/sokoban/README.md +2 -3
  69. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  70. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  71. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  72. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  73. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  74. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
  75. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  76. examples/warming_up_to_rl/task_app/README.md +1 -1
  77. examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
  78. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
  83. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  85. synth_ai/api/train/builders.py +9 -3
  86. synth_ai/api/train/cli.py +125 -10
  87. synth_ai/api/train/configs/__init__.py +8 -1
  88. synth_ai/api/train/configs/rl.py +32 -7
  89. synth_ai/api/train/configs/sft.py +6 -2
  90. synth_ai/api/train/configs/shared.py +59 -2
  91. synth_ai/auth/credentials.py +119 -0
  92. synth_ai/cli/__init__.py +12 -4
  93. synth_ai/cli/commands/__init__.py +17 -0
  94. synth_ai/cli/commands/demo/__init__.py +6 -0
  95. synth_ai/cli/commands/demo/core.py +163 -0
  96. synth_ai/cli/commands/deploy/__init__.py +23 -0
  97. synth_ai/cli/commands/deploy/core.py +614 -0
  98. synth_ai/cli/commands/deploy/errors.py +72 -0
  99. synth_ai/cli/commands/deploy/validation.py +11 -0
  100. synth_ai/cli/commands/eval/__init__.py +19 -0
  101. synth_ai/cli/commands/eval/core.py +1109 -0
  102. synth_ai/cli/commands/eval/errors.py +81 -0
  103. synth_ai/cli/commands/eval/validation.py +133 -0
  104. synth_ai/cli/commands/filter/__init__.py +12 -0
  105. synth_ai/cli/commands/filter/core.py +388 -0
  106. synth_ai/cli/commands/filter/errors.py +55 -0
  107. synth_ai/cli/commands/filter/validation.py +77 -0
  108. synth_ai/cli/commands/help/__init__.py +177 -0
  109. synth_ai/cli/commands/help/core.py +73 -0
  110. synth_ai/cli/commands/status/__init__.py +64 -0
  111. synth_ai/cli/commands/status/client.py +192 -0
  112. synth_ai/cli/commands/status/config.py +92 -0
  113. synth_ai/cli/commands/status/errors.py +20 -0
  114. synth_ai/cli/commands/status/formatters.py +164 -0
  115. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  116. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  117. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  118. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  119. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  120. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  121. synth_ai/cli/commands/status/utils.py +114 -0
  122. synth_ai/cli/commands/train/__init__.py +53 -0
  123. synth_ai/cli/commands/train/core.py +21 -0
  124. synth_ai/cli/commands/train/errors.py +117 -0
  125. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  126. synth_ai/cli/commands/train/judge_validation.py +304 -0
  127. synth_ai/cli/commands/train/validation.py +443 -0
  128. synth_ai/cli/demo.py +2 -162
  129. synth_ai/cli/deploy/__init__.py +28 -0
  130. synth_ai/cli/deploy/core.py +5 -0
  131. synth_ai/cli/deploy/errors.py +23 -0
  132. synth_ai/cli/deploy/validation.py +5 -0
  133. synth_ai/cli/eval/__init__.py +36 -0
  134. synth_ai/cli/eval/core.py +5 -0
  135. synth_ai/cli/eval/errors.py +31 -0
  136. synth_ai/cli/eval/validation.py +5 -0
  137. synth_ai/cli/filter/__init__.py +28 -0
  138. synth_ai/cli/filter/core.py +5 -0
  139. synth_ai/cli/filter/errors.py +23 -0
  140. synth_ai/cli/filter/validation.py +5 -0
  141. synth_ai/cli/modal_serve/__init__.py +12 -0
  142. synth_ai/cli/modal_serve/core.py +14 -0
  143. synth_ai/cli/modal_serve/errors.py +8 -0
  144. synth_ai/cli/modal_serve/validation.py +11 -0
  145. synth_ai/cli/serve/__init__.py +12 -0
  146. synth_ai/cli/serve/core.py +14 -0
  147. synth_ai/cli/serve/errors.py +8 -0
  148. synth_ai/cli/serve/validation.py +11 -0
  149. synth_ai/cli/setup.py +20 -265
  150. synth_ai/cli/status.py +7 -126
  151. synth_ai/cli/task_app_deploy.py +1 -10
  152. synth_ai/cli/task_app_modal_serve.py +4 -9
  153. synth_ai/cli/task_app_serve.py +4 -11
  154. synth_ai/cli/task_apps.py +58 -1487
  155. synth_ai/cli/train/__init__.py +12 -0
  156. synth_ai/cli/train/core.py +21 -0
  157. synth_ai/cli/train/errors.py +8 -0
  158. synth_ai/cli/train/validation.py +24 -0
  159. synth_ai/cli/train.py +1 -14
  160. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  161. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  162. synth_ai/environments/examples/red/engine.py +33 -12
  163. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  164. synth_ai/environments/examples/red/environment.py +26 -0
  165. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  166. synth_ai/http.py +12 -0
  167. synth_ai/judge_schemas.py +10 -11
  168. synth_ai/learning/rl/client.py +3 -1
  169. synth_ai/streaming/__init__.py +29 -0
  170. synth_ai/streaming/config.py +94 -0
  171. synth_ai/streaming/handlers.py +469 -0
  172. synth_ai/streaming/streamer.py +301 -0
  173. synth_ai/streaming/types.py +95 -0
  174. synth_ai/task/validators.py +2 -2
  175. synth_ai/tracing_v3/migration_helper.py +1 -2
  176. synth_ai/utils/env.py +25 -18
  177. synth_ai/utils/http.py +4 -1
  178. synth_ai/utils/modal.py +2 -2
  179. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
  180. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
  181. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  182. synth_ai/cli/tui.py +0 -62
  183. synth_ai/tui/__init__.py +0 -5
  184. synth_ai/tui/__main__.py +0 -13
  185. synth_ai/tui/cli/__init__.py +0 -1
  186. synth_ai/tui/cli/query_experiments.py +0 -164
  187. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  188. synth_ai/tui/dashboard.py +0 -911
  189. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  190. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  191. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  192. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -55,21 +55,21 @@ uvx synth-ai train --type sft --config configs/vision_sft/crafter_qwen3vl_8b_gpt
55
55
  Run Crafter agent using Qwen-VL models via synth-ai's hosted inference.
56
56
 
57
57
  **Models supported:**
58
- - `Qwen/Qwen2-VL-7B-Instruct`
59
- - `Qwen/Qwen2-VL-2B-Instruct`
60
- - `Qwen/Qwen3-VL-8B` (or any Qwen VL variant)
58
+ - `Qwen/Qwen3-VL-2B-Instruct`
59
+ - `Qwen/Qwen3-VL-4B-Instruct`
60
+ - `Qwen/Qwen3-VL-8B-Instruct` (or any Qwen3 VL variant)
61
61
 
62
62
  **Usage:**
63
63
  ```bash
64
- # Run with Qwen2-VL-7B
64
+ # Run with Qwen3-VL-4B
65
65
  uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
66
- --model Qwen/Qwen2-VL-7B-Instruct \
66
+ --model Qwen/Qwen3-VL-4B-Instruct \
67
67
  --seeds 10 \
68
68
  --steps 20
69
69
 
70
70
  # Run with Qwen3-VL-8B
71
71
  uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
72
- --model Qwen/Qwen3-VL-8B \
72
+ --model Qwen/Qwen3-VL-8B-Instruct \
73
73
  --seeds 10 \
74
74
  --steps 20
75
75
  ```
@@ -113,13 +113,13 @@ uv run python examples/qwen_vl/collect_vision_traces.py \
113
113
  --max-steps 50 \
114
114
  --output-dir traces/gpt5nano_vision
115
115
 
116
- # Collect traces with Qwen2-VL via synth
116
+ # Collect traces with Qwen3-VL via synth
117
117
  uv run python examples/qwen_vl/collect_vision_traces.py \
118
- --model Qwen/Qwen2-VL-7B-Instruct \
118
+ --model Qwen/Qwen3-VL-8B-Instruct \
119
119
  --provider synth \
120
120
  --episodes 100 \
121
121
  --max-steps 50 \
122
- --output-dir traces/qwen2vl_vision
122
+ --output-dir traces/qwen3vl_vision
123
123
  ```
124
124
 
125
125
  **Output:** SQLite database with multimodal traces ready for SFT export.
@@ -132,8 +132,7 @@ CrafterPolicy automatically detects vision capability from model names:
132
132
  - ✅ `gpt-5*` → Vision enabled
133
133
  - ✅ `gpt-4o*` → Vision enabled
134
134
  - ✅ `*qwen-vl*` → Vision enabled
135
- - ✅ `*qwen2-vl*` → Vision enabled
136
- - ✅ `qwen3-vl*` → Vision enabled
135
+ - ✅ `*qwen3-vl*` → Vision enabled
137
136
 
138
137
  Or set explicitly: `policy.use_vision = True`
139
138
 
@@ -151,4 +150,3 @@ Crafter environment provides observations as:
151
150
  3. Export to SFT JSONL format (see `vision_sft_rl.txt`)
152
151
  4. Train VLM with LoRA (see monorepo SFT configs)
153
152
  5. Fine-tune with RL/GRPO
154
-
@@ -15,10 +15,10 @@ Complete vision-language model (VLM) infrastructure for Crafter with image obser
15
15
 
16
16
  ### **Configuration Files**
17
17
  6. **`configs/eval_gpt5nano_vision.toml`** - Eval config for gpt-5-nano
18
- 7. **`configs/eval_qwen2vl_vision.toml`** - Eval config for Qwen2-VL
18
+ 7. **`configs/eval_qwen3vl_vision.toml`** - Eval config for Qwen3-VL
19
19
  8. **`configs/eval_gpt4o_mini_vision.toml`** - Eval config for gpt-4o-mini (stronger teacher)
20
20
  9. **`configs/filter_vision_sft.toml`** - Filter config for gpt-5-nano traces
21
- 10. **`configs/filter_qwen2vl_sft.toml`** - Filter config for Qwen2-VL traces
21
+ 10. **`configs/filter_qwen3vl_sft.toml`** - Filter config for Qwen3-VL traces
22
22
  11. **`configs/crafter_vlm_sft_example.toml`** - Example SFT training config
23
23
 
24
24
  ### **Documentation**
@@ -81,7 +81,7 @@ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py --seeds 5 --steps 10
81
81
  - Stores traces to SQLite with base64-encoded images
82
82
  - Supports parallel episodes for faster collection
83
83
 
84
- **Config:** `eval_gpt5nano_vision.toml`, `eval_qwen2vl_vision.toml`, etc.
84
+ **Config:** `eval_gpt5nano_vision.toml`, `eval_qwen3vl_vision.toml`, etc.
85
85
 
86
86
  ### **synth-ai filter** (Quality Filtering)
87
87
  - Removes low-quality episodes (too short, errors, loops)
@@ -89,7 +89,7 @@ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py --seeds 5 --steps 10
89
89
  - Exports to SFT JSONL format (OpenAI-style messages)
90
90
  - Splits into train/val sets
91
91
 
92
- **Config:** `filter_vision_sft.toml`, `filter_qwen2vl_sft.toml`
92
+ **Config:** `filter_vision_sft.toml`, `filter_qwen3vl_sft.toml`
93
93
 
94
94
  ### **synth-ai train** (Model Training)
95
95
  - Trains VLM with LoRA on collected traces
@@ -194,13 +194,13 @@ model = "gpt-4o-mini-2024-07-18" # Stronger teacher
194
194
  ### Collect More Episodes
195
195
  ```toml
196
196
  [eval]
197
- num_episodes = 500 # Default: 100
197
+ seeds = "0-499" # Default: "0-99"
198
198
  ```
199
199
 
200
200
  ### Change Image Resolution
201
201
  ```toml
202
- [task.config]
203
- render_size = [128, 128] # Default: [64, 64]
202
+ [eval.env_config]
203
+ env_params = {render_size = [128, 128]} # Default: [64, 64]
204
204
  ```
205
205
 
206
206
  ### Adjust Quality Filters
@@ -272,4 +272,3 @@ min_achievements_per_episode = 0
272
272
  ---
273
273
 
274
274
  **Infrastructure ready!** 🎉 Start collecting vision traces and training your VLM! 🚀
275
-
@@ -33,7 +33,7 @@ test_vision_inference_multiple_images() # Multiple images per message
33
33
  **File:** `tests/integration/cli/test_cli_train_sft_vision.py`
34
34
 
35
35
  ```python
36
- test_cli_train_sft_vision_qwen2vl() # Full SFT job submission
36
+ test_cli_train_sft_vision_qwen3vl() # Full SFT job submission
37
37
  test_vision_sft_dataset_validation() # Dataset quality checks
38
38
  test_cli_train_sft_vision_small_config() # Fast CI test
39
39
  ```
@@ -478,7 +478,7 @@ tests/integration/cli/test_cli_inference_vision.py::test_vision_inference_valida
478
478
  tests/integration/cli/test_cli_inference_vision.py::test_vision_inference_multiple_images PASSED
479
479
  tests/integration/cli/test_cli_train_sft_vision.py::test_vision_sft_dataset_validation PASSED
480
480
  tests/integration/cli/test_cli_train_sft_vision.py::test_cli_train_sft_vision_small_config PASSED
481
- tests/integration/cli/test_cli_train_sft_vision.py::test_cli_train_sft_vision_qwen2vl PASSED
481
+ tests/integration/cli/test_cli_train_sft_vision.py::test_cli_train_sft_vision_qwen3vl PASSED
482
482
  tests/integration/cli/test_cli_train_rl_vision.py::test_task_app_vision_support PASSED
483
483
  tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_small_config PASSED
484
484
  tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_qwen3vl4b PASSED
@@ -487,4 +487,3 @@ tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_qwen
487
487
  ```
488
488
 
489
489
  **Status:** 🎯 Production-ready! Complete vision ML pipeline tested from inference through RL training! 🎉
490
-
@@ -5,8 +5,8 @@ Use synth-ai's built-in CLI tools to collect vision traces for SFT training.
5
5
  ## 📋 Overview
6
6
 
7
7
  **Pipeline:**
8
- 1. `synth-ai serve` → Start Crafter task app with vision support
9
- 2. `synth-ai eval` → Run rollouts with gpt-5-nano or Qwen-VL, collect traces
8
+ 1. `synth-ai deploy --runtime=uvicorn` → Start the Crafter task app locally
9
+ 2. `synth-ai eval` → Run rollouts with GPT-4o Mini or Qwen3-VL and collect traces
10
10
  3. `synth-ai filter` → Filter traces by quality, convert to SFT format
11
11
 
12
12
  ---
@@ -19,9 +19,10 @@ Use synth-ai's built-in CLI tools to collect vision traces for SFT training.
19
19
  cd /Users/joshpurtell/Documents/GitHub/synth-ai
20
20
 
21
21
  # Serve Crafter task app on localhost:8000
22
- uvx synth-ai serve \
23
- --task-app examples/task_apps/crafter/task_app/synth_envs_hosted/main.py \
24
- --port 8000
22
+ uvx synth-ai deploy grpo-crafter-task-app \
23
+ --runtime uvicorn \
24
+ --port 8000 \
25
+ --trace traces/v3
25
26
  ```
26
27
 
27
28
  **Output:**
@@ -32,7 +33,7 @@ uvx synth-ai serve \
32
33
 
33
34
  ### Option B: Use Hosted Task App (Modal)
34
35
 
35
- If you have a deployed Crafter task app on Modal:
36
+ If you already have a deployed Crafter task app on Modal:
36
37
  ```bash
37
38
  export TASK_APP_URL="https://synth-laboratories--grpo-crafter-task-app.modal.run"
38
39
  ```
@@ -41,40 +42,36 @@ export TASK_APP_URL="https://synth-laboratories--grpo-crafter-task-app.modal.run
41
42
 
42
43
  ## 🎯 Step 2: Run Eval with Vision Models
43
44
 
44
- ### Collect gpt-5-nano Traces (OpenAI)
45
+ ### Collect GPT-4o-mini Vision Traces (OpenAI)
45
46
 
46
47
  Create eval config: `examples/qwen_vl/configs/eval_gpt5nano_vision.toml`
47
48
 
48
49
  ```toml
49
- # Evaluation config for gpt-5-nano with vision
50
+ # Evaluation config for gpt-4o-mini (vision)
51
+ # Legacy filename kept for convenience
50
52
  [eval]
51
- model = "gpt-5-nano"
52
- provider = "openai" # Use OpenAI API
53
+ app_id = "grpo-crafter-task-app"
53
54
  task_app_url = "http://localhost:8000" # or your hosted URL
54
-
55
- # Vision settings
56
- use_vision = true
57
- image_only_mode = false # Include both text + images
58
-
59
- # Rollout settings
60
- num_episodes = 100
61
- max_steps_per_episode = 50
62
- seeds = "0-99" # Seeds 0 through 99
63
-
64
- # Sampling
55
+ model = "gpt-4o-mini-2024-07-18"
56
+ seeds = "0-99"
57
+ max_turns = 50
58
+ concurrency = 5
59
+ env_name = "crafter"
60
+ policy_name = "crafter-react"
61
+ trace_format = "structured"
62
+ return_trace = true
63
+
64
+ [eval.env_config]
65
+ env_params = {max_steps_per_episode = 50}
66
+
67
+ [eval.policy_config]
68
+ provider = "openai"
69
+ model = "gpt-4o-mini-2024-07-18"
65
70
  temperature = 0.7
66
71
  max_tokens = 512
67
-
68
- # Trace collection
69
- collect_traces = true
70
- trace_db = "traces/gpt5nano_vision/rollouts.db"
71
-
72
- # Tools
72
+ use_vision = true
73
+ image_only_mode = false
73
74
  use_tools = true
74
-
75
- [task]
76
- name = "crafter"
77
- environment = "crafter-classic"
78
75
  ```
79
76
 
80
77
  **Run evaluation:**
@@ -83,15 +80,15 @@ export OPENAI_API_KEY="sk-..."
83
80
 
84
81
  uvx synth-ai eval \
85
82
  --config examples/qwen_vl/configs/eval_gpt5nano_vision.toml \
86
- --output-dir traces/gpt5nano_vision
83
+ --trace-db traces/gpt4omini_vision/rollouts.db
87
84
  ```
88
85
 
89
86
  **Expected output:**
90
87
  ```
91
- 🎮 Running evaluation: gpt-5-nano on crafter
88
+ 🎮 Running evaluation: gpt-4o-mini on crafter
92
89
  📊 Episodes: 100, Max steps: 50
93
90
  🔍 Vision: enabled (auto-detected from model name)
94
- 📦 Collecting traces to: traces/gpt5nano_vision/rollouts.db
91
+ 📦 Collecting traces to: traces/gpt4omini_vision/rollouts.db
95
92
 
96
93
  Episode 0/100 (seed=0): 50 steps, 3 achievements ✓
97
94
  Episode 1/100 (seed=1): 48 steps, 2 achievements ✓
@@ -103,45 +100,40 @@ Episode 99/100 (seed=99): 50 steps, 3 achievements ✓
103
100
  Total episodes: 100
104
101
  Total steps: 4,923
105
102
  Avg achievements: 2.8
106
- Traces saved to: traces/gpt5nano_vision/rollouts.db
103
+ Traces saved to: traces/gpt4omini_vision/rollouts.db
107
104
  ```
108
105
 
109
106
  ---
110
107
 
111
- ### Collect Qwen-VL Traces (synth-ai hosted)
108
+ ### Collect Qwen3-VL Traces (Synth hosted inference)
112
109
 
113
- Create eval config: `examples/qwen_vl/configs/eval_qwen2vl_vision.toml`
110
+ Create eval config: `examples/qwen_vl/configs/eval_qwen3vl_vision.toml`
114
111
 
115
112
  ```toml
116
- # Evaluation config for Qwen2-VL via synth-ai
113
+ # Evaluation config for Qwen3-VL vision rollouts
117
114
  [eval]
118
- model = "Qwen/Qwen2-VL-7B-Instruct"
119
- provider = "synth" # Use synth-ai hosted inference
115
+ app_id = "grpo-crafter-task-app"
120
116
  task_app_url = "http://localhost:8000"
121
-
122
- # Vision settings (auto-detected from model name)
123
- use_vision = true
124
- image_only_mode = false
125
-
126
- # Rollout settings
127
- num_episodes = 100
128
- max_steps_per_episode = 50
129
- seeds = "0-99"
130
-
131
- # Sampling
117
+ model = "Qwen/Qwen3-VL-8B-Instruct"
118
+ seeds = "100-199"
119
+ max_turns = 50
120
+ concurrency = 5
121
+ env_name = "crafter"
122
+ policy_name = "crafter-react"
123
+ trace_format = "structured"
124
+ return_trace = true
125
+
126
+ [eval.env_config]
127
+ env_params = {max_steps_per_episode = 50}
128
+
129
+ [eval.policy_config]
130
+ provider = "synth"
131
+ model = "Qwen/Qwen3-VL-8B-Instruct"
132
132
  temperature = 0.7
133
133
  max_tokens = 512
134
-
135
- # Trace collection
136
- collect_traces = true
137
- trace_db = "traces/qwen2vl_vision/rollouts.db"
138
-
139
- # Tools
134
+ use_vision = true
135
+ image_only_mode = false
140
136
  use_tools = true
141
-
142
- [task]
143
- name = "crafter"
144
- environment = "crafter-classic"
145
137
  ```
146
138
 
147
139
  **Run evaluation:**
@@ -149,8 +141,8 @@ environment = "crafter-classic"
149
141
  export SYNTH_API_KEY="sk_live_..."
150
142
 
151
143
  uvx synth-ai eval \
152
- --config examples/qwen_vl/configs/eval_qwen2vl_vision.toml \
153
- --output-dir traces/qwen2vl_vision
144
+ --config examples/qwen_vl/configs/eval_qwen3vl_vision.toml \
145
+ --trace-db traces/qwen3vl_vision/rollouts.db
154
146
  ```
155
147
 
156
148
  ---
@@ -169,8 +161,8 @@ Create `examples/qwen_vl/configs/filter_vision_sft.toml`:
169
161
  ```toml
170
162
  # Filter vision traces for SFT training
171
163
  [filter]
172
- input_db = "traces/gpt5nano_vision/rollouts.db"
173
- output_dir = "traces/gpt5nano_vision/sft"
164
+ input_db = "traces/gpt4omini_vision/rollouts.db"
165
+ output_dir = "traces/gpt4omini_vision/sft"
174
166
 
175
167
  # Quality filters
176
168
  min_steps_per_episode = 5
@@ -205,7 +197,7 @@ uvx synth-ai filter \
205
197
 
206
198
  **Expected output:**
207
199
  ```
208
- 📂 Loading traces from traces/gpt5nano_vision/rollouts.db
200
+ 📂 Loading traces from traces/gpt4omini_vision/rollouts.db
209
201
  Total episodes: 100
210
202
  Total steps: 4,923
211
203
 
@@ -222,8 +214,8 @@ uvx synth-ai filter \
222
214
  ✓ Final dataset: 4,190 samples
223
215
 
224
216
  ✂️ Splitting train/val (90%/10%)...
225
- ✓ Train: 3,771 samples → traces/gpt5nano_vision/sft/train.jsonl
226
- ✓ Val: 419 samples → traces/gpt5nano_vision/sft/val.jsonl
217
+ ✓ Train: 3,771 samples → traces/gpt4omini_vision/sft/train.jsonl
218
+ ✓ Val: 419 samples → traces/gpt4omini_vision/sft/val.jsonl
227
219
 
228
220
  ✅ Filter complete!
229
221
  ```
@@ -236,7 +228,7 @@ Check the SFT JSONL format:
236
228
 
237
229
  ```bash
238
230
  # Inspect first sample
239
- head -1 traces/gpt5nano_vision/sft/train.jsonl | jq .
231
+ head -1 traces/gpt4omini_vision/sft/train.jsonl | jq .
240
232
  ```
241
233
 
242
234
  **Expected format:**
@@ -282,7 +274,7 @@ head -1 traces/gpt5nano_vision/sft/train.jsonl | jq .
282
274
  "step": 12,
283
275
  "seed": 42,
284
276
  "has_image": true,
285
- "model": "gpt-5-nano"
277
+ "model": "gpt-4o-mini-2024-07-18"
286
278
  }
287
279
  }
288
280
  ```
@@ -301,8 +293,8 @@ export BACKEND_BASE_URL="https://synth-backend-dev-docker.onrender.com/api"
301
293
  uvx synth-ai train \
302
294
  --type sft \
303
295
  --config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml \
304
- --dataset traces/gpt5nano_vision/sft/train.jsonl \
305
- --eval-dataset traces/gpt5nano_vision/sft/val.jsonl \
296
+ --dataset traces/gpt4omini_vision/sft/train.jsonl \
297
+ --eval-dataset traces/gpt4omini_vision/sft/val.jsonl \
306
298
  --env-file backend/.env.dev
307
299
  ```
308
300
 
@@ -313,15 +305,16 @@ uvx synth-ai train \
313
305
  ```bash
314
306
  # Terminal 1: Serve task app
315
307
  cd /Users/joshpurtell/Documents/GitHub/synth-ai
316
- uvx synth-ai serve \
317
- --task-app examples/task_apps/crafter/task_app/synth_envs_hosted/main.py \
318
- --port 8000
308
+ uvx synth-ai deploy grpo-crafter-task-app \
309
+ --runtime uvicorn \
310
+ --port 8000 \
311
+ --trace traces/v3
319
312
 
320
313
  # Terminal 2: Collect traces
321
314
  export OPENAI_API_KEY="sk-..."
322
315
  uvx synth-ai eval \
323
316
  --config examples/qwen_vl/configs/eval_gpt5nano_vision.toml \
324
- --output-dir traces/gpt5nano_vision
317
+ --trace-db traces/gpt4omini_vision/rollouts.db
325
318
 
326
319
  # Terminal 2: Filter and export
327
320
  uvx synth-ai filter \
@@ -333,8 +326,8 @@ export BACKEND_BASE_URL="https://synth-backend-dev-docker.onrender.com/api"
333
326
  uvx synth-ai train \
334
327
  --type sft \
335
328
  --config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml \
336
- --dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt5nano_vision/sft/train.jsonl \
337
- --eval-dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt5nano_vision/sft/val.jsonl \
329
+ --dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt4omini_vision/sft/train.jsonl \
330
+ --eval-dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt4omini_vision/sft/val.jsonl \
338
331
  --env-file backend/.env.dev
339
332
  ```
340
333
 
@@ -345,7 +338,7 @@ uvx synth-ai train \
345
338
  | Step | Duration | Cost | Notes |
346
339
  |------|----------|------|-------|
347
340
  | 1. Serve | Continuous | Free | Local or Modal |
348
- | 2. Eval (100 episodes) | 30-60 min | ~$1-2 | OpenAI gpt-5-nano |
341
+ | 2. Eval (100 episodes) | 30-60 min | ~$1-2 | OpenAI gpt-4o-mini |
349
342
  | 3. Filter | < 5 min | Free | Local processing |
350
343
  | 4. SFT (2 epochs) | 2-4 hrs | ~$21 | 2x H200 on Modal |
351
344
 
@@ -364,12 +357,12 @@ uvx synth-ai eval --config configs/eval_gpt5nano_vision.toml
364
357
  # Collect from gpt-4o-mini (stronger teacher)
365
358
  uvx synth-ai eval --config configs/eval_gpt4o_mini_vision.toml
366
359
 
367
- # Collect from Qwen2-VL (for comparison)
368
- uvx synth-ai eval --config configs/eval_qwen2vl_vision.toml
360
+ # Collect from Qwen3-VL (for comparison)
361
+ uvx synth-ai eval --config configs/eval_qwen3vl_vision.toml
369
362
 
370
363
  # Merge and filter all traces
371
364
  uvx synth-ai filter \
372
- --input-dbs traces/gpt5nano_vision/rollouts.db,traces/gpt4o_mini_vision/rollouts.db \
365
+ --input-dbs traces/gpt4omini_vision/rollouts.db,traces/qwen3vl_vision/rollouts.db \
373
366
  --output-dir traces/merged_vision/sft \
374
367
  --config configs/filter_vision_sft.toml
375
368
  ```
@@ -402,7 +395,7 @@ curl http://localhost:8000/health
402
395
  ```
403
396
 
404
397
  ### Traces not saving
405
- Ensure `collect_traces = true` in eval config and `trace_db` path is writable.
398
+ Ensure you pass `--trace-db` (or accept the default) so traces land in a SQLite/Turso database.
406
399
 
407
400
  ### Filter removes all samples
408
401
  Lower quality thresholds:
@@ -420,4 +413,3 @@ min_achievements_per_episode = 0 # Allow episodes with no achievements
420
413
  - **Eval Config Schema:** `synth-ai eval --help`
421
414
  - **Filter Config Schema:** `synth-ai filter --help`
422
415
  - **Full Pipeline:** See `/Users/joshpurtell/Documents/GitHub/monorepo/vision_sft_rl.txt`
423
-
@@ -22,13 +22,13 @@ Usage:
22
22
  --max-steps 50 \
23
23
  --output-dir traces/gpt5nano_vision
24
24
 
25
- # Collect with Qwen2-VL via synth
25
+ # Collect with Qwen3-VL via synth
26
26
  uv run python examples/qwen_vl/collect_vision_traces.py \
27
- --model Qwen/Qwen2-VL-7B-Instruct \
27
+ --model Qwen/Qwen3-VL-8B-Instruct \
28
28
  --provider synth \
29
29
  --episodes 100 \
30
30
  --max-steps 50 \
31
- --output-dir traces/qwen2vl_vision
31
+ --output-dir traces/qwen3vl_vision
32
32
  """
33
33
 
34
34
  from __future__ import annotations
@@ -333,7 +333,7 @@ async def main() -> None:
333
333
  parser.add_argument(
334
334
  "--model",
335
335
  required=True,
336
- help="Model name (e.g., gpt-5-nano, Qwen/Qwen2-VL-7B-Instruct)",
336
+ help="Model name (e.g., gpt-5-nano, Qwen/Qwen3-VL-8B-Instruct)",
337
337
  )
338
338
  parser.add_argument(
339
339
  "--provider",
@@ -1,19 +1,9 @@
1
- # Crafter RL with Vision - Qwen3-VL-4B
2
- #
3
- # This configuration runs online RL (GRPO/GSPO) with a vision-language model
4
- # using the same Crafter task app that generates image observations for SFT data.
5
- #
6
- # Model: Qwen/Qwen3-VL-4B (smaller, faster for testing)
7
- # Task App: grpo-crafter-task-app (Modal deployed, supports vision)
8
- # Policy: crafter-react with use_vision=true, image_only_mode=true
9
-
10
1
  [algorithm]
11
2
  type = "online"
12
3
  method = "policy_gradient"
13
4
  variety = "gspo"
14
5
 
15
6
  [services]
16
- # Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
17
7
  task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
18
8
 
19
9
  [compute]
@@ -30,8 +20,6 @@ tensor_parallel = 1
30
20
  [vllm]
31
21
  tensor_parallel_size = 1
32
22
  max_model_len = 4096
33
- # Vision-specific settings
34
- limit_mm_per_prompt = { "image": 1 } # Max 1 image per prompt
35
23
 
36
24
  [reference]
37
25
  placement = "none"
@@ -40,88 +28,83 @@ placement = "none"
40
28
  base = "Qwen/Qwen3-VL-4B-Instruct"
41
29
  trainer_mode = "lora"
42
30
  label = "crafter-rl-vision-qwen3vl4b"
43
- supports_vision = true # Enable vision support
31
+ supports_vision = true
44
32
 
45
33
  [lora]
46
34
  r = 16
47
35
  alpha = 32
48
36
  dropout = 0.05
49
- target_modules = ["all-linear"]
50
- # Note: will automatically include mm_projector for vision models
37
+ target_modules = [ "all-linear",]
51
38
 
52
39
  [rollout]
53
40
  env_name = "crafter"
54
- max_turns = 10 # 10 steps per episode for faster testing
41
+ max_turns = 10
55
42
  episodes_per_batch = 2
56
43
  policy_name = "crafter-react"
57
- max_concurrent_rollouts = 4 # Lower for vision models (memory)
44
+ max_concurrent_rollouts = 4
58
45
  batches_per_step = 2
59
- ops = ["agent", "env"]
60
-
61
- [rollout.env_config]
62
- difficulty = "easy"
63
-
64
- [rollout.env_config.step_rewards]
65
- enabled = true
66
- mode = "decision_stepwise"
67
- strategy = "consistent"
68
- indicator_lambda = 1.0
69
- step_beta = 0.0
70
-
71
- [rollout.policy_config]
72
- # Vision-specific policy settings
73
- use_vision = true # Enable vision input
74
- image_only_mode = true # Use only images, no text observations
75
- temperature = 0.6 # Slightly higher for exploration
76
- top_p = 0.95
77
- max_tokens = 512
78
- max_llm_calls = 10
46
+ ops = [ "agent", "env",]
79
47
 
80
48
  [evaluation]
81
- instances = 8 # Lower for faster vision evals
49
+ instances = 8
82
50
  every_n_iters = 5
83
- seeds = [0, 1, 2, 3, 4, 5, 6, 7]
51
+ seeds = [ 0, 1, 2, 3, 4, 5, 6, 7,]
84
52
 
85
53
  [training]
86
54
  num_epochs = 1
87
- iterations_per_epoch = 3 # Shorter for integration test
55
+ iterations_per_epoch = 3
88
56
  gradient_accumulation_steps = 2
89
57
  max_accumulated_minibatch = 1
90
58
  max_turns = 10
91
- batch_size = 2 # Smaller for vision models
59
+ batch_size = 2
92
60
  group_size = 2
93
61
  learning_rate = 5e-5
94
62
  log_interval = 1
95
63
  weight_sync_interval = 1
96
64
  event_rewards_kind = "unique"
97
- async_semaphore_max = 2 # Lower concurrency for vision
98
-
99
- # Enable dense decision rewards
65
+ async_semaphore_max = 2
100
66
  step_rewards_enabled = true
101
67
  step_rewards_mode = "decision_stepwise"
102
68
  step_rewards_indicator_lambda = 1.0
103
69
  step_rewards_beta = 0.0
104
70
  step_rewards_strategy = "consistent"
71
+ max_images_per_message = 1
72
+ supports_vision = true
73
+
74
+ [tags]
75
+ experiment = "crafter_rl_vision_qwen3vl4b"
76
+ task = "crafter_agent_vision"
77
+ model_size = "4b"
78
+ vision_enabled = true
79
+ image_only = true
105
80
 
106
- # Vision-specific training settings
107
- max_images_per_message = 1 # Limit images for memory
108
- supports_vision = true # Enable vision training path
81
+ [vllm.limit_mm_per_prompt]
82
+ image = 1
83
+
84
+ [rollout.env_config]
85
+ difficulty = "easy"
86
+
87
+ [rollout.policy_config]
88
+ use_vision = true
89
+ image_only_mode = true
90
+ temperature = 0.6
91
+ top_p = 0.95
92
+ max_tokens = 512
93
+ max_llm_calls = 10
109
94
 
110
95
  [training.weight_sync]
111
96
  enable = true
112
- targets = ["policy"]
97
+ targets = [ "policy",]
113
98
  mode = "direct"
114
99
  direct = true
115
100
  verify_every_k = 0
116
101
 
117
- [judge]
118
- type = "env" # Use environment rewards only (simpler for testing)
102
+ [judge.options]
119
103
  timeout_s = 30
120
104
 
121
- [tags]
122
- experiment = "crafter_rl_vision_qwen3vl4b"
123
- task = "crafter_agent_vision"
124
- model_size = "4b"
125
- vision_enabled = true
126
- image_only = true
127
-
105
+ [rollout.env_config.step_rewards]
106
+ enabled = true
107
+ mode = "decision_stepwise"
108
+ strategy = "consistent"
109
+ indicator_lambda = 1.0
110
+ step_beta = 0.0
@@ -7,7 +7,7 @@ method = "sft"
7
7
  variety = "lora"
8
8
 
9
9
  [job]
10
- model = "Qwen/Qwen2-VL-7B-Instruct" # or Qwen/Qwen3-VL-8B
10
+ model = "Qwen/Qwen3-VL-8B-Instruct" # or Qwen/Qwen3-VL-4B-Instruct
11
11
  # Dataset from collect_vision_traces.py → export_to_sft.py
12
12
  data = "traces/gpt5nano_vision/train.jsonl"
13
13
 
@@ -57,4 +57,3 @@ task = "crafter"
57
57
  modality = "vision"
58
58
  data_source = "collected_traces"
59
59
  model_family = "qwen_vl"
60
-