synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/baseline/banking77_baseline.py +204 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +6 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -38
- examples/swe/task_app/hosted/policy_routes.py +17 -0
- examples/swe/task_app/hosted/rollout.py +4 -2
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +841 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +288 -39
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
- synth_ai/api/train/builders.py +99 -4
- synth_ai/api/train/cli.py +516 -26
- synth_ai/api/train/config_finder.py +13 -2
- synth_ai/api/train/configs/__init__.py +23 -2
- synth_ai/api/train/configs/prompt_learning.py +442 -0
- synth_ai/api/train/configs/rl.py +61 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/task_app.py +1 -1
- synth_ai/api/train/validators.py +277 -0
- synth_ai/auth/credentials.py +119 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cli/__init__.py +94 -18
- synth_ai/cli/__main__.py +0 -0
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +84 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1436 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +30 -158
- synth_ai/cli/deploy/__init__.py +43 -0
- synth_ai/cli/deploy.py +162 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +14 -8
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +107 -0
- synth_ai/cli/root.py +9 -5
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +51 -1480
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -10
- synth_ai/learning/__init__.py +10 -0
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +184 -0
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/pricing/__init__.py +2 -0
- synth_ai/pricing/model_pricing.py +57 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +518 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +2 -0
- synth_ai/task/tracing_utils.py +25 -25
- synth_ai/task/validators.py +45 -9
- synth_ai/task_app_cfgs.py +21 -0
- synth_ai/tracing_v3/config.py +162 -19
- synth_ai/tracing_v3/constants.py +1 -1
- synth_ai/tracing_v3/db_config.py +24 -38
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/storage/config.py +47 -13
- synth_ai/tracing_v3/storage/factory.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +113 -11
- synth_ai/tracing_v3/turso/native_manager.py +92 -16
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +30 -1
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/cli.py +149 -5
- synth_ai/utils/env.py +40 -33
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/modal.py +285 -3
- synth_ai/utils/paths.py +48 -0
- synth_ai/utils/uvicorn.py +113 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
examples/qwen_vl/README.md
CHANGED
|
@@ -55,21 +55,21 @@ uvx synth-ai train --type sft --config configs/vision_sft/crafter_qwen3vl_8b_gpt
|
|
|
55
55
|
Run Crafter agent using Qwen-VL models via synth-ai's hosted inference.
|
|
56
56
|
|
|
57
57
|
**Models supported:**
|
|
58
|
-
- `Qwen/
|
|
59
|
-
- `Qwen/
|
|
60
|
-
- `Qwen/Qwen3-VL-8B` (or any
|
|
58
|
+
- `Qwen/Qwen3-VL-2B-Instruct`
|
|
59
|
+
- `Qwen/Qwen3-VL-4B-Instruct`
|
|
60
|
+
- `Qwen/Qwen3-VL-8B-Instruct` (or any Qwen3 VL variant)
|
|
61
61
|
|
|
62
62
|
**Usage:**
|
|
63
63
|
```bash
|
|
64
|
-
# Run with
|
|
64
|
+
# Run with Qwen3-VL-4B
|
|
65
65
|
uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
|
|
66
|
-
--model Qwen/
|
|
66
|
+
--model Qwen/Qwen3-VL-4B-Instruct \
|
|
67
67
|
--seeds 10 \
|
|
68
68
|
--steps 20
|
|
69
69
|
|
|
70
70
|
# Run with Qwen3-VL-8B
|
|
71
71
|
uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
|
|
72
|
-
--model Qwen/Qwen3-VL-8B \
|
|
72
|
+
--model Qwen/Qwen3-VL-8B-Instruct \
|
|
73
73
|
--seeds 10 \
|
|
74
74
|
--steps 20
|
|
75
75
|
```
|
|
@@ -113,13 +113,13 @@ uv run python examples/qwen_vl/collect_vision_traces.py \
|
|
|
113
113
|
--max-steps 50 \
|
|
114
114
|
--output-dir traces/gpt5nano_vision
|
|
115
115
|
|
|
116
|
-
# Collect traces with
|
|
116
|
+
# Collect traces with Qwen3-VL via synth
|
|
117
117
|
uv run python examples/qwen_vl/collect_vision_traces.py \
|
|
118
|
-
--model Qwen/
|
|
118
|
+
--model Qwen/Qwen3-VL-8B-Instruct \
|
|
119
119
|
--provider synth \
|
|
120
120
|
--episodes 100 \
|
|
121
121
|
--max-steps 50 \
|
|
122
|
-
--output-dir traces/
|
|
122
|
+
--output-dir traces/qwen3vl_vision
|
|
123
123
|
```
|
|
124
124
|
|
|
125
125
|
**Output:** SQLite database with multimodal traces ready for SFT export.
|
|
@@ -132,8 +132,7 @@ CrafterPolicy automatically detects vision capability from model names:
|
|
|
132
132
|
- ✅ `gpt-5*` → Vision enabled
|
|
133
133
|
- ✅ `gpt-4o*` → Vision enabled
|
|
134
134
|
- ✅ `*qwen-vl*` → Vision enabled
|
|
135
|
-
- ✅ `*
|
|
136
|
-
- ✅ `qwen3-vl*` → Vision enabled
|
|
135
|
+
- ✅ `*qwen3-vl*` → Vision enabled
|
|
137
136
|
|
|
138
137
|
Or set explicitly: `policy.use_vision = True`
|
|
139
138
|
|
|
@@ -151,4 +150,3 @@ Crafter environment provides observations as:
|
|
|
151
150
|
3. Export to SFT JSONL format (see `vision_sft_rl.txt`)
|
|
152
151
|
4. Train VLM with LoRA (see monorepo SFT configs)
|
|
153
152
|
5. Fine-tune with RL/GRPO
|
|
154
|
-
|
|
@@ -15,10 +15,10 @@ Complete vision-language model (VLM) infrastructure for Crafter with image obser
|
|
|
15
15
|
|
|
16
16
|
### **Configuration Files**
|
|
17
17
|
6. **`configs/eval_gpt5nano_vision.toml`** - Eval config for gpt-5-nano
|
|
18
|
-
7. **`configs/
|
|
18
|
+
7. **`configs/eval_qwen3vl_vision.toml`** - Eval config for Qwen3-VL
|
|
19
19
|
8. **`configs/eval_gpt4o_mini_vision.toml`** - Eval config for gpt-4o-mini (stronger teacher)
|
|
20
20
|
9. **`configs/filter_vision_sft.toml`** - Filter config for gpt-5-nano traces
|
|
21
|
-
10. **`configs/
|
|
21
|
+
10. **`configs/filter_qwen3vl_sft.toml`** - Filter config for Qwen3-VL traces
|
|
22
22
|
11. **`configs/crafter_vlm_sft_example.toml`** - Example SFT training config
|
|
23
23
|
|
|
24
24
|
### **Documentation**
|
|
@@ -81,7 +81,7 @@ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py --seeds 5 --steps 10
|
|
|
81
81
|
- Stores traces to SQLite with base64-encoded images
|
|
82
82
|
- Supports parallel episodes for faster collection
|
|
83
83
|
|
|
84
|
-
**Config:** `eval_gpt5nano_vision.toml`, `
|
|
84
|
+
**Config:** `eval_gpt5nano_vision.toml`, `eval_qwen3vl_vision.toml`, etc.
|
|
85
85
|
|
|
86
86
|
### **synth-ai filter** (Quality Filtering)
|
|
87
87
|
- Removes low-quality episodes (too short, errors, loops)
|
|
@@ -89,7 +89,7 @@ uv run python examples/qwen_vl/crafter_gpt5nano_agent.py --seeds 5 --steps 10
|
|
|
89
89
|
- Exports to SFT JSONL format (OpenAI-style messages)
|
|
90
90
|
- Splits into train/val sets
|
|
91
91
|
|
|
92
|
-
**Config:** `filter_vision_sft.toml`, `
|
|
92
|
+
**Config:** `filter_vision_sft.toml`, `filter_qwen3vl_sft.toml`
|
|
93
93
|
|
|
94
94
|
### **synth-ai train** (Model Training)
|
|
95
95
|
- Trains VLM with LoRA on collected traces
|
|
@@ -194,13 +194,13 @@ model = "gpt-4o-mini-2024-07-18" # Stronger teacher
|
|
|
194
194
|
### Collect More Episodes
|
|
195
195
|
```toml
|
|
196
196
|
[eval]
|
|
197
|
-
|
|
197
|
+
seeds = "0-499" # Default: "0-99"
|
|
198
198
|
```
|
|
199
199
|
|
|
200
200
|
### Change Image Resolution
|
|
201
201
|
```toml
|
|
202
|
-
[
|
|
203
|
-
render_size = [128, 128] # Default: [64, 64]
|
|
202
|
+
[eval.env_config]
|
|
203
|
+
env_params = {render_size = [128, 128]} # Default: [64, 64]
|
|
204
204
|
```
|
|
205
205
|
|
|
206
206
|
### Adjust Quality Filters
|
|
@@ -272,4 +272,3 @@ min_achievements_per_episode = 0
|
|
|
272
272
|
---
|
|
273
273
|
|
|
274
274
|
**Infrastructure ready!** 🎉 Start collecting vision traces and training your VLM! 🚀
|
|
275
|
-
|
|
@@ -33,7 +33,7 @@ test_vision_inference_multiple_images() # Multiple images per message
|
|
|
33
33
|
**File:** `tests/integration/cli/test_cli_train_sft_vision.py`
|
|
34
34
|
|
|
35
35
|
```python
|
|
36
|
-
|
|
36
|
+
test_cli_train_sft_vision_qwen3vl() # Full SFT job submission
|
|
37
37
|
test_vision_sft_dataset_validation() # Dataset quality checks
|
|
38
38
|
test_cli_train_sft_vision_small_config() # Fast CI test
|
|
39
39
|
```
|
|
@@ -478,7 +478,7 @@ tests/integration/cli/test_cli_inference_vision.py::test_vision_inference_valida
|
|
|
478
478
|
tests/integration/cli/test_cli_inference_vision.py::test_vision_inference_multiple_images PASSED
|
|
479
479
|
tests/integration/cli/test_cli_train_sft_vision.py::test_vision_sft_dataset_validation PASSED
|
|
480
480
|
tests/integration/cli/test_cli_train_sft_vision.py::test_cli_train_sft_vision_small_config PASSED
|
|
481
|
-
tests/integration/cli/test_cli_train_sft_vision.py::
|
|
481
|
+
tests/integration/cli/test_cli_train_sft_vision.py::test_cli_train_sft_vision_qwen3vl PASSED
|
|
482
482
|
tests/integration/cli/test_cli_train_rl_vision.py::test_task_app_vision_support PASSED
|
|
483
483
|
tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_small_config PASSED
|
|
484
484
|
tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_qwen3vl4b PASSED
|
|
@@ -487,4 +487,3 @@ tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_qwen
|
|
|
487
487
|
```
|
|
488
488
|
|
|
489
489
|
**Status:** 🎯 Production-ready! Complete vision ML pipeline tested from inference through RL training! 🎉
|
|
490
|
-
|
|
@@ -5,8 +5,8 @@ Use synth-ai's built-in CLI tools to collect vision traces for SFT training.
|
|
|
5
5
|
## 📋 Overview
|
|
6
6
|
|
|
7
7
|
**Pipeline:**
|
|
8
|
-
1. `synth-ai
|
|
9
|
-
2. `synth-ai eval` → Run rollouts with
|
|
8
|
+
1. `synth-ai deploy --runtime=uvicorn` → Start the Crafter task app locally
|
|
9
|
+
2. `synth-ai eval` → Run rollouts with GPT-4o Mini or Qwen3-VL and collect traces
|
|
10
10
|
3. `synth-ai filter` → Filter traces by quality, convert to SFT format
|
|
11
11
|
|
|
12
12
|
---
|
|
@@ -19,9 +19,10 @@ Use synth-ai's built-in CLI tools to collect vision traces for SFT training.
|
|
|
19
19
|
cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
20
20
|
|
|
21
21
|
# Serve Crafter task app on localhost:8000
|
|
22
|
-
uvx synth-ai
|
|
23
|
-
--
|
|
24
|
-
--port 8000
|
|
22
|
+
uvx synth-ai deploy grpo-crafter-task-app \
|
|
23
|
+
--runtime uvicorn \
|
|
24
|
+
--port 8000 \
|
|
25
|
+
--trace traces/v3
|
|
25
26
|
```
|
|
26
27
|
|
|
27
28
|
**Output:**
|
|
@@ -32,7 +33,7 @@ uvx synth-ai serve \
|
|
|
32
33
|
|
|
33
34
|
### Option B: Use Hosted Task App (Modal)
|
|
34
35
|
|
|
35
|
-
If you have a deployed Crafter task app on Modal:
|
|
36
|
+
If you already have a deployed Crafter task app on Modal:
|
|
36
37
|
```bash
|
|
37
38
|
export TASK_APP_URL="https://synth-laboratories--grpo-crafter-task-app.modal.run"
|
|
38
39
|
```
|
|
@@ -41,40 +42,36 @@ export TASK_APP_URL="https://synth-laboratories--grpo-crafter-task-app.modal.run
|
|
|
41
42
|
|
|
42
43
|
## 🎯 Step 2: Run Eval with Vision Models
|
|
43
44
|
|
|
44
|
-
### Collect
|
|
45
|
+
### Collect GPT-4o-mini Vision Traces (OpenAI)
|
|
45
46
|
|
|
46
47
|
Create eval config: `examples/qwen_vl/configs/eval_gpt5nano_vision.toml`
|
|
47
48
|
|
|
48
49
|
```toml
|
|
49
|
-
# Evaluation config for gpt-
|
|
50
|
+
# Evaluation config for gpt-4o-mini (vision)
|
|
51
|
+
# Legacy filename kept for convenience
|
|
50
52
|
[eval]
|
|
51
|
-
|
|
52
|
-
provider = "openai" # Use OpenAI API
|
|
53
|
+
app_id = "grpo-crafter-task-app"
|
|
53
54
|
task_app_url = "http://localhost:8000" # or your hosted URL
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
55
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
56
|
+
seeds = "0-99"
|
|
57
|
+
max_turns = 50
|
|
58
|
+
concurrency = 5
|
|
59
|
+
env_name = "crafter"
|
|
60
|
+
policy_name = "crafter-react"
|
|
61
|
+
trace_format = "structured"
|
|
62
|
+
return_trace = true
|
|
63
|
+
|
|
64
|
+
[eval.env_config]
|
|
65
|
+
env_params = {max_steps_per_episode = 50}
|
|
66
|
+
|
|
67
|
+
[eval.policy_config]
|
|
68
|
+
provider = "openai"
|
|
69
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
65
70
|
temperature = 0.7
|
|
66
71
|
max_tokens = 512
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
collect_traces = true
|
|
70
|
-
trace_db = "traces/gpt5nano_vision/rollouts.db"
|
|
71
|
-
|
|
72
|
-
# Tools
|
|
72
|
+
use_vision = true
|
|
73
|
+
image_only_mode = false
|
|
73
74
|
use_tools = true
|
|
74
|
-
|
|
75
|
-
[task]
|
|
76
|
-
name = "crafter"
|
|
77
|
-
environment = "crafter-classic"
|
|
78
75
|
```
|
|
79
76
|
|
|
80
77
|
**Run evaluation:**
|
|
@@ -83,15 +80,15 @@ export OPENAI_API_KEY="sk-..."
|
|
|
83
80
|
|
|
84
81
|
uvx synth-ai eval \
|
|
85
82
|
--config examples/qwen_vl/configs/eval_gpt5nano_vision.toml \
|
|
86
|
-
--
|
|
83
|
+
--trace-db traces/gpt4omini_vision/rollouts.db
|
|
87
84
|
```
|
|
88
85
|
|
|
89
86
|
**Expected output:**
|
|
90
87
|
```
|
|
91
|
-
🎮 Running evaluation: gpt-
|
|
88
|
+
🎮 Running evaluation: gpt-4o-mini on crafter
|
|
92
89
|
📊 Episodes: 100, Max steps: 50
|
|
93
90
|
🔍 Vision: enabled (auto-detected from model name)
|
|
94
|
-
📦 Collecting traces to: traces/
|
|
91
|
+
📦 Collecting traces to: traces/gpt4omini_vision/rollouts.db
|
|
95
92
|
|
|
96
93
|
Episode 0/100 (seed=0): 50 steps, 3 achievements ✓
|
|
97
94
|
Episode 1/100 (seed=1): 48 steps, 2 achievements ✓
|
|
@@ -103,45 +100,40 @@ Episode 99/100 (seed=99): 50 steps, 3 achievements ✓
|
|
|
103
100
|
Total episodes: 100
|
|
104
101
|
Total steps: 4,923
|
|
105
102
|
Avg achievements: 2.8
|
|
106
|
-
Traces saved to: traces/
|
|
103
|
+
Traces saved to: traces/gpt4omini_vision/rollouts.db
|
|
107
104
|
```
|
|
108
105
|
|
|
109
106
|
---
|
|
110
107
|
|
|
111
|
-
### Collect
|
|
108
|
+
### Collect Qwen3-VL Traces (Synth hosted inference)
|
|
112
109
|
|
|
113
|
-
Create eval config: `examples/qwen_vl/configs/
|
|
110
|
+
Create eval config: `examples/qwen_vl/configs/eval_qwen3vl_vision.toml`
|
|
114
111
|
|
|
115
112
|
```toml
|
|
116
|
-
# Evaluation config for
|
|
113
|
+
# Evaluation config for Qwen3-VL vision rollouts
|
|
117
114
|
[eval]
|
|
118
|
-
|
|
119
|
-
provider = "synth" # Use synth-ai hosted inference
|
|
115
|
+
app_id = "grpo-crafter-task-app"
|
|
120
116
|
task_app_url = "http://localhost:8000"
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
117
|
+
model = "Qwen/Qwen3-VL-8B-Instruct"
|
|
118
|
+
seeds = "100-199"
|
|
119
|
+
max_turns = 50
|
|
120
|
+
concurrency = 5
|
|
121
|
+
env_name = "crafter"
|
|
122
|
+
policy_name = "crafter-react"
|
|
123
|
+
trace_format = "structured"
|
|
124
|
+
return_trace = true
|
|
125
|
+
|
|
126
|
+
[eval.env_config]
|
|
127
|
+
env_params = {max_steps_per_episode = 50}
|
|
128
|
+
|
|
129
|
+
[eval.policy_config]
|
|
130
|
+
provider = "synth"
|
|
131
|
+
model = "Qwen/Qwen3-VL-8B-Instruct"
|
|
132
132
|
temperature = 0.7
|
|
133
133
|
max_tokens = 512
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
collect_traces = true
|
|
137
|
-
trace_db = "traces/qwen2vl_vision/rollouts.db"
|
|
138
|
-
|
|
139
|
-
# Tools
|
|
134
|
+
use_vision = true
|
|
135
|
+
image_only_mode = false
|
|
140
136
|
use_tools = true
|
|
141
|
-
|
|
142
|
-
[task]
|
|
143
|
-
name = "crafter"
|
|
144
|
-
environment = "crafter-classic"
|
|
145
137
|
```
|
|
146
138
|
|
|
147
139
|
**Run evaluation:**
|
|
@@ -149,8 +141,8 @@ environment = "crafter-classic"
|
|
|
149
141
|
export SYNTH_API_KEY="sk_live_..."
|
|
150
142
|
|
|
151
143
|
uvx synth-ai eval \
|
|
152
|
-
--config examples/qwen_vl/configs/
|
|
153
|
-
--
|
|
144
|
+
--config examples/qwen_vl/configs/eval_qwen3vl_vision.toml \
|
|
145
|
+
--trace-db traces/qwen3vl_vision/rollouts.db
|
|
154
146
|
```
|
|
155
147
|
|
|
156
148
|
---
|
|
@@ -169,8 +161,8 @@ Create `examples/qwen_vl/configs/filter_vision_sft.toml`:
|
|
|
169
161
|
```toml
|
|
170
162
|
# Filter vision traces for SFT training
|
|
171
163
|
[filter]
|
|
172
|
-
input_db = "traces/
|
|
173
|
-
output_dir = "traces/
|
|
164
|
+
input_db = "traces/gpt4omini_vision/rollouts.db"
|
|
165
|
+
output_dir = "traces/gpt4omini_vision/sft"
|
|
174
166
|
|
|
175
167
|
# Quality filters
|
|
176
168
|
min_steps_per_episode = 5
|
|
@@ -205,7 +197,7 @@ uvx synth-ai filter \
|
|
|
205
197
|
|
|
206
198
|
**Expected output:**
|
|
207
199
|
```
|
|
208
|
-
📂 Loading traces from traces/
|
|
200
|
+
📂 Loading traces from traces/gpt4omini_vision/rollouts.db
|
|
209
201
|
Total episodes: 100
|
|
210
202
|
Total steps: 4,923
|
|
211
203
|
|
|
@@ -222,8 +214,8 @@ uvx synth-ai filter \
|
|
|
222
214
|
✓ Final dataset: 4,190 samples
|
|
223
215
|
|
|
224
216
|
✂️ Splitting train/val (90%/10%)...
|
|
225
|
-
✓ Train: 3,771 samples → traces/
|
|
226
|
-
✓ Val: 419 samples → traces/
|
|
217
|
+
✓ Train: 3,771 samples → traces/gpt4omini_vision/sft/train.jsonl
|
|
218
|
+
✓ Val: 419 samples → traces/gpt4omini_vision/sft/val.jsonl
|
|
227
219
|
|
|
228
220
|
✅ Filter complete!
|
|
229
221
|
```
|
|
@@ -236,7 +228,7 @@ Check the SFT JSONL format:
|
|
|
236
228
|
|
|
237
229
|
```bash
|
|
238
230
|
# Inspect first sample
|
|
239
|
-
head -1 traces/
|
|
231
|
+
head -1 traces/gpt4omini_vision/sft/train.jsonl | jq .
|
|
240
232
|
```
|
|
241
233
|
|
|
242
234
|
**Expected format:**
|
|
@@ -282,7 +274,7 @@ head -1 traces/gpt5nano_vision/sft/train.jsonl | jq .
|
|
|
282
274
|
"step": 12,
|
|
283
275
|
"seed": 42,
|
|
284
276
|
"has_image": true,
|
|
285
|
-
"model": "gpt-
|
|
277
|
+
"model": "gpt-4o-mini-2024-07-18"
|
|
286
278
|
}
|
|
287
279
|
}
|
|
288
280
|
```
|
|
@@ -301,8 +293,8 @@ export BACKEND_BASE_URL="https://synth-backend-dev-docker.onrender.com/api"
|
|
|
301
293
|
uvx synth-ai train \
|
|
302
294
|
--type sft \
|
|
303
295
|
--config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml \
|
|
304
|
-
--dataset traces/
|
|
305
|
-
--eval-dataset traces/
|
|
296
|
+
--dataset traces/gpt4omini_vision/sft/train.jsonl \
|
|
297
|
+
--eval-dataset traces/gpt4omini_vision/sft/val.jsonl \
|
|
306
298
|
--env-file backend/.env.dev
|
|
307
299
|
```
|
|
308
300
|
|
|
@@ -313,15 +305,16 @@ uvx synth-ai train \
|
|
|
313
305
|
```bash
|
|
314
306
|
# Terminal 1: Serve task app
|
|
315
307
|
cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
316
|
-
uvx synth-ai
|
|
317
|
-
--
|
|
318
|
-
--port 8000
|
|
308
|
+
uvx synth-ai deploy grpo-crafter-task-app \
|
|
309
|
+
--runtime uvicorn \
|
|
310
|
+
--port 8000 \
|
|
311
|
+
--trace traces/v3
|
|
319
312
|
|
|
320
313
|
# Terminal 2: Collect traces
|
|
321
314
|
export OPENAI_API_KEY="sk-..."
|
|
322
315
|
uvx synth-ai eval \
|
|
323
316
|
--config examples/qwen_vl/configs/eval_gpt5nano_vision.toml \
|
|
324
|
-
--
|
|
317
|
+
--trace-db traces/gpt4omini_vision/rollouts.db
|
|
325
318
|
|
|
326
319
|
# Terminal 2: Filter and export
|
|
327
320
|
uvx synth-ai filter \
|
|
@@ -333,8 +326,8 @@ export BACKEND_BASE_URL="https://synth-backend-dev-docker.onrender.com/api"
|
|
|
333
326
|
uvx synth-ai train \
|
|
334
327
|
--type sft \
|
|
335
328
|
--config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml \
|
|
336
|
-
--dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/
|
|
337
|
-
--eval-dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/
|
|
329
|
+
--dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt4omini_vision/sft/train.jsonl \
|
|
330
|
+
--eval-dataset /Users/joshpurtell/Documents/GitHub/synth-ai/traces/gpt4omini_vision/sft/val.jsonl \
|
|
338
331
|
--env-file backend/.env.dev
|
|
339
332
|
```
|
|
340
333
|
|
|
@@ -345,7 +338,7 @@ uvx synth-ai train \
|
|
|
345
338
|
| Step | Duration | Cost | Notes |
|
|
346
339
|
|------|----------|------|-------|
|
|
347
340
|
| 1. Serve | Continuous | Free | Local or Modal |
|
|
348
|
-
| 2. Eval (100 episodes) | 30-60 min | ~$1-2 | OpenAI gpt-
|
|
341
|
+
| 2. Eval (100 episodes) | 30-60 min | ~$1-2 | OpenAI gpt-4o-mini |
|
|
349
342
|
| 3. Filter | < 5 min | Free | Local processing |
|
|
350
343
|
| 4. SFT (2 epochs) | 2-4 hrs | ~$21 | 2x H200 on Modal |
|
|
351
344
|
|
|
@@ -364,12 +357,12 @@ uvx synth-ai eval --config configs/eval_gpt5nano_vision.toml
|
|
|
364
357
|
# Collect from gpt-4o-mini (stronger teacher)
|
|
365
358
|
uvx synth-ai eval --config configs/eval_gpt4o_mini_vision.toml
|
|
366
359
|
|
|
367
|
-
# Collect from
|
|
368
|
-
uvx synth-ai eval --config configs/
|
|
360
|
+
# Collect from Qwen3-VL (for comparison)
|
|
361
|
+
uvx synth-ai eval --config configs/eval_qwen3vl_vision.toml
|
|
369
362
|
|
|
370
363
|
# Merge and filter all traces
|
|
371
364
|
uvx synth-ai filter \
|
|
372
|
-
--input-dbs traces/
|
|
365
|
+
--input-dbs traces/gpt4omini_vision/rollouts.db,traces/qwen3vl_vision/rollouts.db \
|
|
373
366
|
--output-dir traces/merged_vision/sft \
|
|
374
367
|
--config configs/filter_vision_sft.toml
|
|
375
368
|
```
|
|
@@ -402,7 +395,7 @@ curl http://localhost:8000/health
|
|
|
402
395
|
```
|
|
403
396
|
|
|
404
397
|
### Traces not saving
|
|
405
|
-
Ensure
|
|
398
|
+
Ensure you pass `--trace-db` (or accept the default) so traces land in a SQLite/Turso database.
|
|
406
399
|
|
|
407
400
|
### Filter removes all samples
|
|
408
401
|
Lower quality thresholds:
|
|
@@ -420,4 +413,3 @@ min_achievements_per_episode = 0 # Allow episodes with no achievements
|
|
|
420
413
|
- **Eval Config Schema:** `synth-ai eval --help`
|
|
421
414
|
- **Filter Config Schema:** `synth-ai filter --help`
|
|
422
415
|
- **Full Pipeline:** See `/Users/joshpurtell/Documents/GitHub/monorepo/vision_sft_rl.txt`
|
|
423
|
-
|
|
@@ -22,13 +22,13 @@ Usage:
|
|
|
22
22
|
--max-steps 50 \
|
|
23
23
|
--output-dir traces/gpt5nano_vision
|
|
24
24
|
|
|
25
|
-
# Collect with
|
|
25
|
+
# Collect with Qwen3-VL via synth
|
|
26
26
|
uv run python examples/qwen_vl/collect_vision_traces.py \
|
|
27
|
-
--model Qwen/
|
|
27
|
+
--model Qwen/Qwen3-VL-8B-Instruct \
|
|
28
28
|
--provider synth \
|
|
29
29
|
--episodes 100 \
|
|
30
30
|
--max-steps 50 \
|
|
31
|
-
--output-dir traces/
|
|
31
|
+
--output-dir traces/qwen3vl_vision
|
|
32
32
|
"""
|
|
33
33
|
|
|
34
34
|
from __future__ import annotations
|
|
@@ -333,7 +333,7 @@ async def main() -> None:
|
|
|
333
333
|
parser.add_argument(
|
|
334
334
|
"--model",
|
|
335
335
|
required=True,
|
|
336
|
-
help="Model name (e.g., gpt-5-nano, Qwen/
|
|
336
|
+
help="Model name (e.g., gpt-5-nano, Qwen/Qwen3-VL-8B-Instruct)",
|
|
337
337
|
)
|
|
338
338
|
parser.add_argument(
|
|
339
339
|
"--provider",
|
|
@@ -1,19 +1,9 @@
|
|
|
1
|
-
# Crafter RL with Vision - Qwen3-VL-4B
|
|
2
|
-
#
|
|
3
|
-
# This configuration runs online RL (GRPO/GSPO) with a vision-language model
|
|
4
|
-
# using the same Crafter task app that generates image observations for SFT data.
|
|
5
|
-
#
|
|
6
|
-
# Model: Qwen/Qwen3-VL-4B (smaller, faster for testing)
|
|
7
|
-
# Task App: grpo-crafter-task-app (Modal deployed, supports vision)
|
|
8
|
-
# Policy: crafter-react with use_vision=true, image_only_mode=true
|
|
9
|
-
|
|
10
1
|
[algorithm]
|
|
11
2
|
type = "online"
|
|
12
3
|
method = "policy_gradient"
|
|
13
4
|
variety = "gspo"
|
|
14
5
|
|
|
15
6
|
[services]
|
|
16
|
-
# Replace with the Modal URL printed by `uvx synth-ai modal-serve grpo-crafter`
|
|
17
7
|
task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
|
|
18
8
|
|
|
19
9
|
[compute]
|
|
@@ -30,8 +20,6 @@ tensor_parallel = 1
|
|
|
30
20
|
[vllm]
|
|
31
21
|
tensor_parallel_size = 1
|
|
32
22
|
max_model_len = 4096
|
|
33
|
-
# Vision-specific settings
|
|
34
|
-
limit_mm_per_prompt = { "image": 1 } # Max 1 image per prompt
|
|
35
23
|
|
|
36
24
|
[reference]
|
|
37
25
|
placement = "none"
|
|
@@ -40,88 +28,83 @@ placement = "none"
|
|
|
40
28
|
base = "Qwen/Qwen3-VL-4B-Instruct"
|
|
41
29
|
trainer_mode = "lora"
|
|
42
30
|
label = "crafter-rl-vision-qwen3vl4b"
|
|
43
|
-
supports_vision = true
|
|
31
|
+
supports_vision = true
|
|
44
32
|
|
|
45
33
|
[lora]
|
|
46
34
|
r = 16
|
|
47
35
|
alpha = 32
|
|
48
36
|
dropout = 0.05
|
|
49
|
-
target_modules = ["all-linear"]
|
|
50
|
-
# Note: will automatically include mm_projector for vision models
|
|
37
|
+
target_modules = [ "all-linear",]
|
|
51
38
|
|
|
52
39
|
[rollout]
|
|
53
40
|
env_name = "crafter"
|
|
54
|
-
max_turns = 10
|
|
41
|
+
max_turns = 10
|
|
55
42
|
episodes_per_batch = 2
|
|
56
43
|
policy_name = "crafter-react"
|
|
57
|
-
max_concurrent_rollouts = 4
|
|
44
|
+
max_concurrent_rollouts = 4
|
|
58
45
|
batches_per_step = 2
|
|
59
|
-
ops = ["agent", "env"]
|
|
60
|
-
|
|
61
|
-
[rollout.env_config]
|
|
62
|
-
difficulty = "easy"
|
|
63
|
-
|
|
64
|
-
[rollout.env_config.step_rewards]
|
|
65
|
-
enabled = true
|
|
66
|
-
mode = "decision_stepwise"
|
|
67
|
-
strategy = "consistent"
|
|
68
|
-
indicator_lambda = 1.0
|
|
69
|
-
step_beta = 0.0
|
|
70
|
-
|
|
71
|
-
[rollout.policy_config]
|
|
72
|
-
# Vision-specific policy settings
|
|
73
|
-
use_vision = true # Enable vision input
|
|
74
|
-
image_only_mode = true # Use only images, no text observations
|
|
75
|
-
temperature = 0.6 # Slightly higher for exploration
|
|
76
|
-
top_p = 0.95
|
|
77
|
-
max_tokens = 512
|
|
78
|
-
max_llm_calls = 10
|
|
46
|
+
ops = [ "agent", "env",]
|
|
79
47
|
|
|
80
48
|
[evaluation]
|
|
81
|
-
instances = 8
|
|
49
|
+
instances = 8
|
|
82
50
|
every_n_iters = 5
|
|
83
|
-
seeds = [0, 1, 2, 3, 4, 5, 6, 7]
|
|
51
|
+
seeds = [ 0, 1, 2, 3, 4, 5, 6, 7,]
|
|
84
52
|
|
|
85
53
|
[training]
|
|
86
54
|
num_epochs = 1
|
|
87
|
-
iterations_per_epoch = 3
|
|
55
|
+
iterations_per_epoch = 3
|
|
88
56
|
gradient_accumulation_steps = 2
|
|
89
57
|
max_accumulated_minibatch = 1
|
|
90
58
|
max_turns = 10
|
|
91
|
-
batch_size = 2
|
|
59
|
+
batch_size = 2
|
|
92
60
|
group_size = 2
|
|
93
61
|
learning_rate = 5e-5
|
|
94
62
|
log_interval = 1
|
|
95
63
|
weight_sync_interval = 1
|
|
96
64
|
event_rewards_kind = "unique"
|
|
97
|
-
async_semaphore_max = 2
|
|
98
|
-
|
|
99
|
-
# Enable dense decision rewards
|
|
65
|
+
async_semaphore_max = 2
|
|
100
66
|
step_rewards_enabled = true
|
|
101
67
|
step_rewards_mode = "decision_stepwise"
|
|
102
68
|
step_rewards_indicator_lambda = 1.0
|
|
103
69
|
step_rewards_beta = 0.0
|
|
104
70
|
step_rewards_strategy = "consistent"
|
|
71
|
+
max_images_per_message = 1
|
|
72
|
+
supports_vision = true
|
|
73
|
+
|
|
74
|
+
[tags]
|
|
75
|
+
experiment = "crafter_rl_vision_qwen3vl4b"
|
|
76
|
+
task = "crafter_agent_vision"
|
|
77
|
+
model_size = "4b"
|
|
78
|
+
vision_enabled = true
|
|
79
|
+
image_only = true
|
|
105
80
|
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
81
|
+
[vllm.limit_mm_per_prompt]
|
|
82
|
+
image = 1
|
|
83
|
+
|
|
84
|
+
[rollout.env_config]
|
|
85
|
+
difficulty = "easy"
|
|
86
|
+
|
|
87
|
+
[rollout.policy_config]
|
|
88
|
+
use_vision = true
|
|
89
|
+
image_only_mode = true
|
|
90
|
+
temperature = 0.6
|
|
91
|
+
top_p = 0.95
|
|
92
|
+
max_tokens = 512
|
|
93
|
+
max_llm_calls = 10
|
|
109
94
|
|
|
110
95
|
[training.weight_sync]
|
|
111
96
|
enable = true
|
|
112
|
-
targets = ["policy"]
|
|
97
|
+
targets = [ "policy",]
|
|
113
98
|
mode = "direct"
|
|
114
99
|
direct = true
|
|
115
100
|
verify_every_k = 0
|
|
116
101
|
|
|
117
|
-
[judge]
|
|
118
|
-
type = "env" # Use environment rewards only (simpler for testing)
|
|
102
|
+
[judge.options]
|
|
119
103
|
timeout_s = 30
|
|
120
104
|
|
|
121
|
-
[
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
105
|
+
[rollout.env_config.step_rewards]
|
|
106
|
+
enabled = true
|
|
107
|
+
mode = "decision_stepwise"
|
|
108
|
+
strategy = "consistent"
|
|
109
|
+
indicator_lambda = 1.0
|
|
110
|
+
step_beta = 0.0
|
|
@@ -7,7 +7,7 @@ method = "sft"
|
|
|
7
7
|
variety = "lora"
|
|
8
8
|
|
|
9
9
|
[job]
|
|
10
|
-
model = "Qwen/
|
|
10
|
+
model = "Qwen/Qwen3-VL-8B-Instruct" # or Qwen/Qwen3-VL-4B-Instruct
|
|
11
11
|
# Dataset from collect_vision_traces.py → export_to_sft.py
|
|
12
12
|
data = "traces/gpt5nano_vision/train.jsonl"
|
|
13
13
|
|
|
@@ -57,4 +57,3 @@ task = "crafter"
|
|
|
57
57
|
modality = "vision"
|
|
58
58
|
data_source = "collected_traces"
|
|
59
59
|
model_family = "qwen_vl"
|
|
60
|
-
|