synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
  4. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  5. examples/multi_step/convert_traces_to_sft.py +84 -0
  6. examples/multi_step/run_sft_qwen30b.sh +45 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
  8. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  9. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  10. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  11. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  12. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  13. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  14. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  15. examples/qwen_vl/QUICKSTART.md +327 -0
  16. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  17. examples/qwen_vl/README.md +154 -0
  18. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  19. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  20. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  21. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  22. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  23. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  24. examples/qwen_vl/__init__.py +2 -0
  25. examples/qwen_vl/collect_data_via_cli.md +423 -0
  26. examples/qwen_vl/collect_vision_traces.py +368 -0
  27. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  28. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  29. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  30. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  31. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  32. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  33. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  34. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  35. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  36. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  37. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  38. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  39. examples/qwen_vl/run_vision_comparison.sh +62 -0
  40. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  41. examples/qwen_vl/test_image_validation.py +201 -0
  42. examples/qwen_vl/test_sft_vision_data.py +110 -0
  43. examples/rl/README.md +1 -1
  44. examples/rl/configs/eval_base_qwen.toml +17 -0
  45. examples/rl/configs/eval_rl_qwen.toml +13 -0
  46. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  47. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  48. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  49. examples/rl/run_eval.py +436 -0
  50. examples/rl/run_rl_and_save.py +111 -0
  51. examples/rl/task_app/README.md +22 -0
  52. examples/rl/task_app/math_single_step.py +990 -0
  53. examples/rl/task_app/math_task_app.py +111 -0
  54. examples/sft/README.md +5 -5
  55. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  56. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  57. examples/sft/evaluate.py +2 -4
  58. examples/sft/export_dataset.py +7 -4
  59. examples/swe/task_app/README.md +1 -1
  60. examples/swe/task_app/grpo_swe_mini.py +0 -1
  61. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  62. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  63. examples/swe/task_app/hosted/policy_routes.py +0 -2
  64. examples/swe/task_app/hosted/rollout.py +0 -8
  65. examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
  69. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
  70. examples/task_apps/enron/__init__.py +1 -0
  71. examples/vlm/README.md +3 -3
  72. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  73. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  74. examples/vlm/filter_image_rows.py +1 -1
  75. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  76. examples/warming_up_to_rl/_utils.py +92 -0
  77. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  78. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  79. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  80. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  81. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  82. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  83. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  84. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  85. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  86. examples/warming_up_to_rl/readme.md +63 -132
  87. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  88. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  89. examples/warming_up_to_rl/task_app/README.md +42 -0
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  115. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  116. synth_ai/__init__.py +44 -30
  117. synth_ai/_utils/__init__.py +47 -0
  118. synth_ai/_utils/base_url.py +10 -0
  119. synth_ai/_utils/http.py +10 -0
  120. synth_ai/_utils/prompts.py +10 -0
  121. synth_ai/_utils/task_app_state.py +12 -0
  122. synth_ai/_utils/user_config.py +10 -0
  123. synth_ai/api/models/supported.py +144 -7
  124. synth_ai/api/train/__init__.py +13 -1
  125. synth_ai/api/train/cli.py +30 -7
  126. synth_ai/api/train/config_finder.py +18 -11
  127. synth_ai/api/train/env_resolver.py +13 -10
  128. synth_ai/cli/__init__.py +62 -78
  129. synth_ai/cli/_modal_wrapper.py +7 -5
  130. synth_ai/cli/_typer_patch.py +0 -2
  131. synth_ai/cli/_validate_task_app.py +22 -4
  132. synth_ai/cli/legacy_root_backup.py +3 -1
  133. synth_ai/cli/lib/__init__.py +10 -0
  134. synth_ai/cli/lib/task_app_discovery.py +7 -0
  135. synth_ai/cli/lib/task_app_env.py +518 -0
  136. synth_ai/cli/recent.py +2 -1
  137. synth_ai/cli/setup.py +266 -0
  138. synth_ai/cli/status.py +1 -1
  139. synth_ai/cli/task_app_deploy.py +16 -0
  140. synth_ai/cli/task_app_list.py +25 -0
  141. synth_ai/cli/task_app_modal_serve.py +16 -0
  142. synth_ai/cli/task_app_serve.py +18 -0
  143. synth_ai/cli/task_apps.py +71 -31
  144. synth_ai/cli/traces.py +1 -1
  145. synth_ai/cli/train.py +18 -0
  146. synth_ai/cli/tui.py +7 -2
  147. synth_ai/cli/turso.py +1 -1
  148. synth_ai/cli/watch.py +1 -1
  149. synth_ai/demos/__init__.py +10 -0
  150. synth_ai/demos/core/__init__.py +28 -1
  151. synth_ai/demos/crafter/__init__.py +1 -0
  152. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  153. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  154. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  155. synth_ai/demos/demo_registry.py +176 -0
  156. synth_ai/demos/math/__init__.py +1 -0
  157. synth_ai/demos/math/_common.py +16 -0
  158. synth_ai/demos/math/app.py +38 -0
  159. synth_ai/demos/math/config.toml +76 -0
  160. synth_ai/demos/math/deploy_modal.py +54 -0
  161. synth_ai/demos/math/modal_task_app.py +702 -0
  162. synth_ai/demos/math/task_app_entry.py +51 -0
  163. synth_ai/environments/environment/core.py +7 -1
  164. synth_ai/environments/examples/bandit/engine.py +0 -1
  165. synth_ai/environments/examples/bandit/environment.py +0 -1
  166. synth_ai/environments/examples/wordle/environment.py +0 -1
  167. synth_ai/evals/base.py +16 -5
  168. synth_ai/evals/client.py +1 -1
  169. synth_ai/inference/client.py +1 -1
  170. synth_ai/judge_schemas.py +8 -8
  171. synth_ai/learning/client.py +1 -1
  172. synth_ai/learning/health.py +1 -1
  173. synth_ai/learning/jobs.py +1 -1
  174. synth_ai/learning/rl/client.py +1 -1
  175. synth_ai/learning/rl/env_keys.py +1 -1
  176. synth_ai/learning/rl/secrets.py +1 -1
  177. synth_ai/learning/sft/client.py +1 -1
  178. synth_ai/learning/sft/data.py +407 -4
  179. synth_ai/learning/validators.py +4 -1
  180. synth_ai/task/apps/__init__.py +4 -2
  181. synth_ai/task/config.py +6 -4
  182. synth_ai/task/rubrics/__init__.py +1 -2
  183. synth_ai/task/rubrics/loaders.py +14 -10
  184. synth_ai/task/rubrics.py +219 -0
  185. synth_ai/task/trace_correlation_helpers.py +24 -11
  186. synth_ai/task/tracing_utils.py +14 -3
  187. synth_ai/task/validators.py +2 -3
  188. synth_ai/tracing_v3/abstractions.py +3 -3
  189. synth_ai/tracing_v3/config.py +15 -13
  190. synth_ai/tracing_v3/constants.py +21 -0
  191. synth_ai/tracing_v3/db_config.py +3 -1
  192. synth_ai/tracing_v3/decorators.py +10 -7
  193. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  194. synth_ai/tracing_v3/session_tracer.py +7 -7
  195. synth_ai/tracing_v3/storage/base.py +29 -29
  196. synth_ai/tracing_v3/storage/config.py +3 -3
  197. synth_ai/tracing_v3/turso/daemon.py +8 -9
  198. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  199. synth_ai/tracing_v3/utils.py +2 -2
  200. synth_ai/tui/cli/query_experiments.py +4 -4
  201. synth_ai/tui/cli/query_experiments_v3.py +4 -4
  202. synth_ai/tui/dashboard.py +14 -9
  203. synth_ai/utils/__init__.py +101 -0
  204. synth_ai/utils/base_url.py +94 -0
  205. synth_ai/utils/cli.py +131 -0
  206. synth_ai/utils/env.py +287 -0
  207. synth_ai/utils/http.py +169 -0
  208. synth_ai/utils/modal.py +308 -0
  209. synth_ai/utils/process.py +212 -0
  210. synth_ai/utils/prompts.py +39 -0
  211. synth_ai/utils/sqld.py +122 -0
  212. synth_ai/utils/task_app_discovery.py +882 -0
  213. synth_ai/utils/task_app_env.py +186 -0
  214. synth_ai/utils/task_app_state.py +318 -0
  215. synth_ai/utils/user_config.py +137 -0
  216. synth_ai/v0/config/__init__.py +1 -5
  217. synth_ai/v0/config/base_url.py +1 -7
  218. synth_ai/v0/tracing/config.py +1 -1
  219. synth_ai/v0/tracing/decorators.py +1 -1
  220. synth_ai/v0/tracing/upload.py +1 -1
  221. synth_ai/v0/tracing_v1/config.py +1 -1
  222. synth_ai/v0/tracing_v1/decorators.py +1 -1
  223. synth_ai/v0/tracing_v1/upload.py +1 -1
  224. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  225. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
  226. synth_ai/cli/man.py +0 -106
  227. synth_ai/compound/cais.py +0 -0
  228. synth_ai/core/experiment.py +0 -13
  229. synth_ai/core/system.py +0 -15
  230. synth_ai/demo_registry.py +0 -295
  231. synth_ai/handshake.py +0 -109
  232. synth_ai/http.py +0 -26
  233. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  234. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  235. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  236. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,275 @@
1
+ # ✅ VLM Setup Complete!
2
+
3
+ Complete vision-language model (VLM) infrastructure for Crafter with image observations.
4
+
5
+ ## 📦 What Was Created
6
+
7
+ ### **Core Examples** (Python Scripts)
8
+ 1. **`crafter_gpt5nano_agent.py`** - Demo agent using OpenAI gpt-5-nano
9
+ 2. **`crafter_qwen_vl_agent.py`** - Demo agent using Qwen-VL via synth-ai
10
+ 3. **`collect_vision_traces.py`** - Manual trace collection script
11
+
12
+ ### **CLI-Based Pipeline** (Recommended)
13
+ 4. **`run_vision_sft_pipeline.sh`** - Complete automated pipeline
14
+ 5. **`run_vision_comparison.sh`** - Compare gpt-5-nano vs Qwen-VL
15
+
16
+ ### **Configuration Files**
17
+ 6. **`configs/eval_gpt5nano_vision.toml`** - Eval config for gpt-5-nano
18
+ 7. **`configs/eval_qwen2vl_vision.toml`** - Eval config for Qwen2-VL
19
+ 8. **`configs/eval_gpt4o_mini_vision.toml`** - Eval config for gpt-4o-mini (stronger teacher)
20
+ 9. **`configs/filter_vision_sft.toml`** - Filter config for gpt-5-nano traces
21
+ 10. **`configs/filter_qwen2vl_sft.toml`** - Filter config for Qwen2-VL traces
22
+ 11. **`configs/crafter_vlm_sft_example.toml`** - Example SFT training config
23
+
24
+ ### **Documentation**
25
+ 12. **`README.md`** - Overview and quick start
26
+ 13. **`QUICKSTART.md`** - Complete manual pipeline guide
27
+ 14. **`collect_data_via_cli.md`** - **Detailed CLI guide** ⭐
28
+ 15. **`SETUP_COMPLETE.md`** - This file
29
+
30
+ ---
31
+
32
+ ## 🚀 Quick Start (3 Commands)
33
+
34
+ ### Option 1: Automated Pipeline
35
+ ```bash
36
+ cd /Users/joshpurtell/Documents/GitHub/synth-ai
37
+ export OPENAI_API_KEY="sk-..."
38
+ bash examples/qwen_vl/run_vision_sft_pipeline.sh
39
+ ```
40
+
41
+ ### Option 2: Step-by-Step CLI
42
+ ```bash
43
+ # 1. Collect traces (30-60 min)
44
+ uvx synth-ai eval --config examples/qwen_vl/configs/eval_gpt5nano_vision.toml
45
+
46
+ # 2. Filter and export (< 1 min)
47
+ uvx synth-ai filter --config examples/qwen_vl/configs/filter_vision_sft.toml
48
+
49
+ # 3. Train SFT (2-4 hours)
50
+ cd /Users/joshpurtell/Documents/GitHub/monorepo
51
+ uvx synth-ai train --type sft --config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml
52
+ ```
53
+
54
+ ### Option 3: Quick Demo
55
+ ```bash
56
+ # Test gpt-5-nano (5 episodes, 10 steps each)
57
+ export OPENAI_API_KEY="sk-..."
58
+ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py --seeds 5 --steps 10
59
+ ```
60
+
61
+ ---
62
+
63
+ ## 📖 Documentation Index
64
+
65
+ | File | Purpose |
66
+ |------|---------|
67
+ | **`collect_data_via_cli.md`** ⭐ | **Main guide**: Complete CLI-based pipeline |
68
+ | `README.md` | Overview and quick reference |
69
+ | `QUICKSTART.md` | Manual Python script approach |
70
+ | `SETUP_COMPLETE.md` | This summary (you are here) |
71
+
72
+ **Start here:** 👉 `collect_data_via_cli.md`
73
+
74
+ ---
75
+
76
+ ## 🎯 What Each Tool Does
77
+
78
+ ### **synth-ai eval** (Data Collection)
79
+ - Runs rollouts with vision-enabled models
80
+ - Automatically detects vision capability from model name
81
+ - Stores traces to SQLite with base64-encoded images
82
+ - Supports parallel episodes for faster collection
83
+
84
+ **Config:** `eval_gpt5nano_vision.toml`, `eval_qwen2vl_vision.toml`, etc.
85
+
86
+ ### **synth-ai filter** (Quality Filtering)
87
+ - Removes low-quality episodes (too short, errors, loops)
88
+ - Deduplicates state-action pairs
89
+ - Exports to SFT JSONL format (OpenAI-style messages)
90
+ - Splits into train/val sets
91
+
92
+ **Config:** `filter_vision_sft.toml`, `filter_qwen2vl_sft.toml`
93
+
94
+ ### **synth-ai train** (Model Training)
95
+ - Trains VLM with LoRA on collected traces
96
+ - Supports Qwen-VL models (Qwen2-VL, Qwen3-VL)
97
+ - Uses 2x or 4x H200 GPUs
98
+ - Saves adapters to HF Hub or S3
99
+
100
+ **Config:** `crafter_vlm_sft_example.toml` (in synth-ai repo)
101
+ **Training config:** `monorepo/configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml`
102
+
103
+ ---
104
+
105
+ ## 🔍 Key Features
106
+
107
+ ### **Automatic Vision Detection**
108
+ CrafterPolicy auto-detects vision from model names:
109
+ ```python
110
+ # These automatically enable vision:
111
+ "gpt-5-nano" # ✅
112
+ "gpt-4o-mini" # ✅
113
+ "Qwen2-VL-7B-Instruct" # ✅
114
+ "Qwen3-VL-8B" # ✅
115
+ ```
116
+
117
+ ### **Multimodal Messages**
118
+ User messages include both text and images:
119
+ ```json
120
+ {
121
+ "role": "user",
122
+ "content": [
123
+ {"type": "text", "text": "Observation: Health: 9/9, Hunger: 9/9..."},
124
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBORw0KGgo..."}}
125
+ ]
126
+ }
127
+ ```
128
+
129
+ ### **64x64 PNG Images**
130
+ Crafter renders 64x64 frames as base64-encoded PNGs:
131
+ - Efficient token usage (~85 tokens per image)
132
+ - High enough resolution for gameplay
133
+ - Standard OpenAI vision format
134
+
135
+ ---
136
+
137
+ ## 💰 Cost & Timeline
138
+
139
+ ### Complete Pipeline (gpt-5-nano → SFT → RL)
140
+
141
+ | Step | Duration | Cost | Hardware |
142
+ |------|----------|------|----------|
143
+ | Data collection (100 episodes) | 30-60 min | ~$1-2 | OpenAI API |
144
+ | Filter & export | < 5 min | Free | Local |
145
+ | SFT training (2 epochs) | 2-4 hrs | ~$21 | 2x H200 |
146
+ | RL fine-tuning (20 iterations) | 6-10 hrs | ~$112 | 4x H200 |
147
+ | Evaluation (100 episodes × 4 models) | 2-3 hrs | ~$5 | 1x H200 |
148
+
149
+ **Total:** ~$140, 12-18 hours
150
+
151
+ ---
152
+
153
+ ## 🎉 Next Steps
154
+
155
+ 1. **Run a quick demo** to verify vision inference works:
156
+ ```bash
157
+ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py --seeds 3 --steps 5
158
+ ```
159
+
160
+ 2. **Collect training data** (100 episodes):
161
+ ```bash
162
+ uvx synth-ai eval --config examples/qwen_vl/configs/eval_gpt5nano_vision.toml
163
+ ```
164
+
165
+ 3. **Filter and export** to SFT format:
166
+ ```bash
167
+ uvx synth-ai filter --config examples/qwen_vl/configs/filter_vision_sft.toml
168
+ ```
169
+
170
+ 4. **Train VLM** with LoRA:
171
+ ```bash
172
+ cd /Users/joshpurtell/Documents/GitHub/monorepo
173
+ uvx synth-ai train --type sft --config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml
174
+ ```
175
+
176
+ 5. **Fine-tune with RL** (optional):
177
+ ```bash
178
+ uvx synth-ai train --type rl --config configs/vision_rl/crafter_qwen3vl_8b_grpo.toml
179
+ ```
180
+
181
+ 6. **Benchmark** final model vs baselines
182
+
183
+ ---
184
+
185
+ ## 🔧 Customization
186
+
187
+ ### Use a Different Teacher Model
188
+ Edit `configs/eval_gpt5nano_vision.toml`:
189
+ ```toml
190
+ [eval]
191
+ model = "gpt-4o-mini-2024-07-18" # Stronger teacher
192
+ ```
193
+
194
+ ### Collect More Episodes
195
+ ```toml
196
+ [eval]
197
+ num_episodes = 500 # Default: 100
198
+ ```
199
+
200
+ ### Change Image Resolution
201
+ ```toml
202
+ [task.config]
203
+ render_size = [128, 128] # Default: [64, 64]
204
+ ```
205
+
206
+ ### Adjust Quality Filters
207
+ Edit `configs/filter_vision_sft.toml`:
208
+ ```toml
209
+ [filter]
210
+ min_steps_per_episode = 10 # Stricter (default: 5)
211
+ min_achievements_per_episode = 2 # Require achievements (default: 0)
212
+ ```
213
+
214
+ ---
215
+
216
+ ## 📊 Expected Results
217
+
218
+ ### Data Collection Quality
219
+ - **gpt-5-nano:** ~20-30% achievement rate
220
+ - **gpt-4o-mini:** ~35-45% achievement rate (better teacher)
221
+ - **Qwen2-VL-7B (base):** ~5-10% achievement rate
222
+
223
+ ### SFT Performance (After Training)
224
+ - **Base Qwen-VL:** ~5-10% → **SFT:** ~20-30%
225
+ - **Improvement:** +15-20% absolute gain from distillation
226
+
227
+ ### RL Performance (After 20 Iterations)
228
+ - **SFT:** ~20-30% → **SFT+RL:** ~40-50%
229
+ - **Improvement:** +20% absolute gain from RL fine-tuning
230
+
231
+ ---
232
+
233
+ ## 🐛 Troubleshooting
234
+
235
+ ### Vision not detected
236
+ ```bash
237
+ # Add explicitly in eval config:
238
+ use_vision = true
239
+ ```
240
+
241
+ ### API key errors
242
+ ```bash
243
+ # OpenAI
244
+ export OPENAI_API_KEY="sk-..."
245
+
246
+ # synth-ai
247
+ export SYNTH_API_KEY="sk_live_..."
248
+ ```
249
+
250
+ ### Task app connection failed
251
+ ```bash
252
+ # Check task app is running
253
+ curl https://synth-laboratories--grpo-crafter-task-app.modal.run/health
254
+ ```
255
+
256
+ ### Filter removes all samples
257
+ ```bash
258
+ # Lower quality thresholds in filter config
259
+ min_steps_per_episode = 3
260
+ min_achievements_per_episode = 0
261
+ ```
262
+
263
+ ---
264
+
265
+ ## 📚 Related Resources
266
+
267
+ - **Main plan:** `/Users/joshpurtell/Documents/GitHub/monorepo/vision_sft_rl.txt` (Phase 9)
268
+ - **Crafter environment:** `examples/task_apps/crafter/README.md`
269
+ - **OpenAI VLM examples:** `examples/vlm/`
270
+ - **synth-ai CLI docs:** Run `uvx synth-ai --help`
271
+
272
+ ---
273
+
274
+ **Infrastructure ready!** 🎉 Start collecting vision traces and training your VLM! 🚀
275
+