synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
- examples/qwen_coder/configs/coder_lora_small.toml +2 -1
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +154 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +275 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +423 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
- examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +62 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +1 -1
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +37 -0
- examples/rl/configs/rl_from_base_qwen17.toml +76 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +22 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/sft/README.md +5 -5
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
- examples/sft/evaluate.py +4 -4
- examples/sft/export_dataset.py +7 -4
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/README.md +1 -1
- examples/swe/task_app/grpo_swe_mini.py +1 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +2 -8
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +3 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
- examples/task_apps/pokemon_red/task_app.py +199 -6
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +145 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/cli.py +30 -7
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/cli/__init__.py +66 -49
- synth_ai/cli/_modal_wrapper.py +9 -6
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/recent.py +1 -0
- synth_ai/cli/setup.py +266 -0
- synth_ai/cli/task_app_deploy.py +16 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +16 -0
- synth_ai/cli/task_app_serve.py +18 -0
- synth_ai/cli/task_apps.py +392 -141
- synth_ai/cli/train.py +18 -0
- synth_ai/cli/tui.py +62 -0
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/verilog/engine.py +76 -10
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/inference/client.py +1 -1
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +1 -1
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/task/__init__.py +11 -1
- synth_ai/task/apps/__init__.py +5 -2
- synth_ai/task/config.py +259 -0
- synth_ai/task/contracts.py +15 -2
- synth_ai/task/rubrics/__init__.py +4 -2
- synth_ai/task/rubrics/loaders.py +27 -4
- synth_ai/task/rubrics/scoring.py +3 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +145 -2
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/session_tracer.py +10 -0
- synth_ai/tracing_v3/turso/daemon.py +2 -2
- synth_ai/tracing_v3/turso/native_manager.py +108 -77
- synth_ai/tracing_v3/utils.py +1 -1
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +911 -0
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +287 -0
- synth_ai/utils/http.py +169 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
- synth_ai/cli/man.py +0 -106
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/http.py +0 -26
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
# Vision RL Integration Testing
|
|
2
|
+
|
|
3
|
+
Complete integration tests for Reinforcement Learning with vision-language models using the Crafter task app.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
These tests verify the full vision RL pipeline:
|
|
8
|
+
1. **Task App**: Same Crafter task app used for SFT data collection (generates image observations)
|
|
9
|
+
2. **Model**: Qwen3-VL-4B (smaller, faster for testing)
|
|
10
|
+
3. **Policy**: Uses `image_only_mode=true` - agent sees only images, no text observations
|
|
11
|
+
4. **Training**: Full RL (GRPO/GSPO) with vision-capable model
|
|
12
|
+
|
|
13
|
+
## Files
|
|
14
|
+
|
|
15
|
+
### Configs
|
|
16
|
+
- `configs/crafter_rl_vision_qwen3vl4b.toml` - Full RL config for Qwen3-VL-4B with vision
|
|
17
|
+
|
|
18
|
+
### Tests
|
|
19
|
+
- `../../tests/integration/cli/test_cli_train_rl_vision.py` - Integration tests:
|
|
20
|
+
- `test_cli_train_rl_vision_qwen3vl4b` - Full RL training test
|
|
21
|
+
- `test_task_app_vision_support` - Task app vision capability test
|
|
22
|
+
|
|
23
|
+
## Quick Start
|
|
24
|
+
|
|
25
|
+
### 1. Prerequisites
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# Required environment variables
|
|
29
|
+
export SYNTH_API_KEY="your-api-key"
|
|
30
|
+
export BACKEND_BASE_URL="https://agent-learning.onrender.com/api" # or your backend
|
|
31
|
+
export ENVIRONMENT_API_KEY="your-modal-key" # For Modal deployment
|
|
32
|
+
|
|
33
|
+
# Optional: for faster testing
|
|
34
|
+
export TASK_APP_WARMUP_TIMEOUT=300 # 5min for vision models
|
|
35
|
+
export SYNTH_TRAIN_TEST_POLL_TIMEOUT=180
|
|
36
|
+
```
|
|
37
|
+
|
|
38
|
+
### 2. Run Tests
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
42
|
+
|
|
43
|
+
# Run all vision RL tests
|
|
44
|
+
uv run pytest tests/integration/cli/test_cli_train_rl_vision.py -v -s
|
|
45
|
+
|
|
46
|
+
# Run specific test
|
|
47
|
+
uv run pytest tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_qwen3vl4b -v -s
|
|
48
|
+
|
|
49
|
+
# Run with marks
|
|
50
|
+
uv run pytest -m "vision and slow" -v -s
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
### 3. Manual RL Training (without pytest)
|
|
54
|
+
|
|
55
|
+
```bash
|
|
56
|
+
# 1. Deploy task app (if not already deployed)
|
|
57
|
+
uvx synth-ai task-app deploy grpo-crafter --name grpo-crafter-task-app
|
|
58
|
+
|
|
59
|
+
# 2. Get task app URL (from deploy output)
|
|
60
|
+
export TASK_APP_URL="https://your-app.modal.run"
|
|
61
|
+
|
|
62
|
+
# 3. Run RL training
|
|
63
|
+
uvx synth-ai train \
|
|
64
|
+
--type rl \
|
|
65
|
+
--config examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml \
|
|
66
|
+
--backend $BACKEND_BASE_URL \
|
|
67
|
+
--task-url $TASK_APP_URL
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Configuration Details
|
|
71
|
+
|
|
72
|
+
### Model: Qwen3-VL-4B
|
|
73
|
+
```toml
|
|
74
|
+
[model]
|
|
75
|
+
base = "Qwen/Qwen3-VL-4B-Instruct"
|
|
76
|
+
trainer_mode = "lora"
|
|
77
|
+
supports_vision = true # Enable vision support
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### Vision-Specific Settings
|
|
81
|
+
```toml
|
|
82
|
+
[vllm]
|
|
83
|
+
limit_mm_per_prompt = { "image": 1 } # Max 1 image per prompt
|
|
84
|
+
|
|
85
|
+
[rollout.policy_config]
|
|
86
|
+
use_vision = true # Enable vision input
|
|
87
|
+
image_only_mode = true # Use only images, no text observations
|
|
88
|
+
temperature = 0.6
|
|
89
|
+
max_tokens = 512
|
|
90
|
+
|
|
91
|
+
[training]
|
|
92
|
+
batch_size = 2 # Smaller for vision models (memory)
|
|
93
|
+
max_images_per_message = 1
|
|
94
|
+
supports_vision = true
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### GPU Allocation (2x H200)
|
|
98
|
+
```toml
|
|
99
|
+
[topology]
|
|
100
|
+
gpus_for_vllm = 1 # Inference
|
|
101
|
+
gpus_for_training = 1 # Training
|
|
102
|
+
tensor_parallel = 1
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## Test Details
|
|
106
|
+
|
|
107
|
+
### Test 1: Full RL Training
|
|
108
|
+
**Function:** `test_cli_train_rl_vision_qwen3vl4b`
|
|
109
|
+
|
|
110
|
+
**What it tests:**
|
|
111
|
+
1. Task app deployment
|
|
112
|
+
2. Task app warmup (health check)
|
|
113
|
+
3. RL job submission with vision config
|
|
114
|
+
4. Job creation confirmation
|
|
115
|
+
|
|
116
|
+
**Expected output:**
|
|
117
|
+
```
|
|
118
|
+
✅ Vision RL job created: job-abc123
|
|
119
|
+
Model: Qwen3-VL-4B
|
|
120
|
+
Task App: https://your-app.modal.run
|
|
121
|
+
Image Mode: image_only
|
|
122
|
+
```
|
|
123
|
+
|
|
124
|
+
**Runtime:** ~5-10 minutes (deploy + warmup + job submit)
|
|
125
|
+
|
|
126
|
+
### Test 2: Task App Vision Support
|
|
127
|
+
**Function:** `test_task_app_vision_support`
|
|
128
|
+
|
|
129
|
+
**What it tests:**
|
|
130
|
+
1. Task app can be deployed
|
|
131
|
+
2. Task app health endpoint responds
|
|
132
|
+
3. Task app accepts vision policy config
|
|
133
|
+
4. Can make rollout request with `use_vision=true` and `image_only_mode=true`
|
|
134
|
+
|
|
135
|
+
**Expected output:**
|
|
136
|
+
```
|
|
137
|
+
✅ Task app supports vision config
|
|
138
|
+
Response keys: ['trajectory', 'metadata', ...]
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
**Runtime:** ~2-3 minutes (deploy + warmup + single rollout)
|
|
142
|
+
|
|
143
|
+
## Task App Details
|
|
144
|
+
|
|
145
|
+
The Crafter task app (`grpo-crafter-task-app`) provides:
|
|
146
|
+
|
|
147
|
+
### Environment
|
|
148
|
+
- **Crafter game** with visual observations
|
|
149
|
+
- Generates RGB images (64x64 or configurable)
|
|
150
|
+
- Text observations also available (but ignored in `image_only_mode`)
|
|
151
|
+
|
|
152
|
+
### Policy (crafter-react)
|
|
153
|
+
- **Vision Detection:** Auto-detects vision models from name (e.g., "Qwen3-VL", "gpt-4o-mini")
|
|
154
|
+
- **Image Formatting:** Converts observations to OpenAI-style multimodal messages
|
|
155
|
+
- **Tool Calling:** Supports structured action space via tools
|
|
156
|
+
|
|
157
|
+
### Trace Format
|
|
158
|
+
- **Structured traces** with multimodal messages
|
|
159
|
+
- Images stored as base64 in trace DB
|
|
160
|
+
- Compatible with `synth-ai filter` for SFT export
|
|
161
|
+
|
|
162
|
+
## Integration with SFT Pipeline
|
|
163
|
+
|
|
164
|
+
This RL setup uses the **same task app** as the SFT data collection:
|
|
165
|
+
|
|
166
|
+
### SFT Data Collection
|
|
167
|
+
```bash
|
|
168
|
+
# Collect episodes with gpt-4o-mini teacher
|
|
169
|
+
uvx synth-ai eval --config configs/eval_gpt4o_vision_proper.toml
|
|
170
|
+
|
|
171
|
+
# Export to SFT dataset
|
|
172
|
+
uvx synth-ai filter --config configs/filter_vision_sft.toml
|
|
173
|
+
```
|
|
174
|
+
|
|
175
|
+
### RL Training
|
|
176
|
+
```bash
|
|
177
|
+
# Train student model (Qwen3-VL-4B) with RL
|
|
178
|
+
uvx synth-ai train \
|
|
179
|
+
--type rl \
|
|
180
|
+
--config configs/crafter_rl_vision_qwen3vl4b.toml
|
|
181
|
+
```
|
|
182
|
+
|
|
183
|
+
**Benefits:**
|
|
184
|
+
1. **Consistency:** Same environment, same observations
|
|
185
|
+
2. **Curriculum:** SFT → RL progression
|
|
186
|
+
3. **Debugging:** Compare SFT and RL traces in same format
|
|
187
|
+
|
|
188
|
+
## Troubleshooting
|
|
189
|
+
|
|
190
|
+
### Task App Deployment Fails
|
|
191
|
+
```bash
|
|
192
|
+
# Check Modal auth
|
|
193
|
+
modal token set --token-id <id> --token-secret <secret>
|
|
194
|
+
|
|
195
|
+
# Check environment variables
|
|
196
|
+
echo $SYNTH_API_KEY
|
|
197
|
+
echo $ENVIRONMENT_API_KEY
|
|
198
|
+
|
|
199
|
+
# Try manual deploy
|
|
200
|
+
uvx synth-ai task-app deploy grpo-crafter --name grpo-crafter-task-app
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
### Task App Won't Warm Up
|
|
204
|
+
```bash
|
|
205
|
+
# Increase timeout
|
|
206
|
+
export TASK_APP_WARMUP_TIMEOUT=600 # 10 minutes
|
|
207
|
+
|
|
208
|
+
# Check task app logs in Modal dashboard
|
|
209
|
+
# https://modal.com/apps
|
|
210
|
+
|
|
211
|
+
# Try health check manually
|
|
212
|
+
curl https://your-app.modal.run/health
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### RL Job Submission Fails
|
|
216
|
+
```bash
|
|
217
|
+
# Check backend connectivity
|
|
218
|
+
curl $BACKEND_BASE_URL/health
|
|
219
|
+
|
|
220
|
+
# Verify API key
|
|
221
|
+
curl -H "Authorization: Bearer $SYNTH_API_KEY" $BACKEND_BASE_URL/api/health
|
|
222
|
+
|
|
223
|
+
# Check task app URL format
|
|
224
|
+
echo $TASK_APP_URL # Should be https://...modal.run
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### Vision Model OOM (Out of Memory)
|
|
228
|
+
```toml
|
|
229
|
+
# Reduce batch size in config
|
|
230
|
+
[training]
|
|
231
|
+
batch_size = 1 # Down from 2
|
|
232
|
+
gradient_accumulation_steps = 4 # Up from 2
|
|
233
|
+
|
|
234
|
+
# Reduce concurrent rollouts
|
|
235
|
+
[rollout]
|
|
236
|
+
max_concurrent_rollouts = 2 # Down from 4
|
|
237
|
+
```
|
|
238
|
+
|
|
239
|
+
### Images Not Appearing in Training
|
|
240
|
+
```bash
|
|
241
|
+
# Verify vision support is enabled
|
|
242
|
+
grep -A 5 "\[model\]" configs/crafter_rl_vision_qwen3vl4b.toml
|
|
243
|
+
# Should show: supports_vision = true
|
|
244
|
+
|
|
245
|
+
# Check policy config
|
|
246
|
+
grep -A 10 "\[rollout.policy_config\]" configs/crafter_rl_vision_qwen3vl4b.toml
|
|
247
|
+
# Should show: use_vision = true, image_only_mode = true
|
|
248
|
+
|
|
249
|
+
# Verify vLLM config
|
|
250
|
+
grep -A 3 "\[vllm\]" configs/crafter_rl_vision_qwen3vl4b.toml
|
|
251
|
+
# Should show: limit_mm_per_prompt = { "image": 1 }
|
|
252
|
+
```
|
|
253
|
+
|
|
254
|
+
## Performance Expectations
|
|
255
|
+
|
|
256
|
+
### Qwen3-VL-4B (2x H200)
|
|
257
|
+
- **Throughput:** ~2-4 episodes/min (with TP=1)
|
|
258
|
+
- **Memory:** ~40-60GB GPU (model + images + gradients)
|
|
259
|
+
- **Iteration Time:** ~10-15 min (with 4 episodes, 10 steps each)
|
|
260
|
+
|
|
261
|
+
### Training Time Estimates
|
|
262
|
+
- **3 iterations (test):** ~30-45 minutes
|
|
263
|
+
- **10 iterations (short run):** ~2-3 hours
|
|
264
|
+
- **50 iterations (full run):** ~12-20 hours
|
|
265
|
+
|
|
266
|
+
## Next Steps
|
|
267
|
+
|
|
268
|
+
### 1. Baseline Evaluation
|
|
269
|
+
```bash
|
|
270
|
+
# Evaluate untrained model
|
|
271
|
+
uvx synth-ai eval \
|
|
272
|
+
--model Qwen/Qwen3-VL-4B-Instruct \
|
|
273
|
+
--env crafter \
|
|
274
|
+
--seeds 0,1,2,3,4 \
|
|
275
|
+
--policy-config '{"use_vision": true, "image_only_mode": true}'
|
|
276
|
+
```
|
|
277
|
+
|
|
278
|
+
### 2. SFT Initialization (Optional)
|
|
279
|
+
```bash
|
|
280
|
+
# Train on teacher demonstrations first
|
|
281
|
+
uvx synth-ai train \
|
|
282
|
+
--type sft \
|
|
283
|
+
--model Qwen/Qwen3-VL-4B-Instruct \
|
|
284
|
+
--data traces/gpt4o_vision/sft/train.jsonl
|
|
285
|
+
```
|
|
286
|
+
|
|
287
|
+
### 3. RL Fine-Tuning
|
|
288
|
+
```bash
|
|
289
|
+
# Run full RL training
|
|
290
|
+
uvx synth-ai train \
|
|
291
|
+
--type rl \
|
|
292
|
+
--config configs/crafter_rl_vision_qwen3vl4b.toml \
|
|
293
|
+
--iterations 50
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
### 4. Eval Comparison
|
|
297
|
+
```bash
|
|
298
|
+
# Compare pre-trained vs post-RL
|
|
299
|
+
uvx synth-ai eval --model <rl-checkpoint> --seeds 0-9
|
|
300
|
+
```
|
|
301
|
+
|
|
302
|
+
## References
|
|
303
|
+
|
|
304
|
+
- **VLM SFT Pipeline:** `examples/qwen_vl/PIPELINE_RUN_LOG.txt`
|
|
305
|
+
- **Image Validation:** `examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md`
|
|
306
|
+
- **Task App Source:** `examples/task_apps/crafter/task_app/`
|
|
307
|
+
- **Policy Implementation:** `examples/task_apps/crafter/task_app/synth_envs_hosted/policy.py`
|
|
308
|
+
|
|
309
|
+
## CI Integration
|
|
310
|
+
|
|
311
|
+
### Pytest Marks
|
|
312
|
+
```python
|
|
313
|
+
@pytest.mark.slow # Takes >5 minutes
|
|
314
|
+
@pytest.mark.vision # Requires vision model support
|
|
315
|
+
@pytest.mark.integration # Full pipeline test
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
### Run in CI
|
|
319
|
+
```bash
|
|
320
|
+
# Run all integration tests including vision
|
|
321
|
+
pytest tests/integration/cli/ -m integration -v
|
|
322
|
+
|
|
323
|
+
# Run only vision tests
|
|
324
|
+
pytest -m vision -v
|
|
325
|
+
|
|
326
|
+
# Skip slow tests for PR checks
|
|
327
|
+
pytest -m "not slow" -v
|
|
328
|
+
```
|
|
329
|
+
|
|
330
|
+
---
|
|
331
|
+
|
|
332
|
+
**Status:** ✅ Integration tests ready. Task app and RL config validated for Qwen3-VL-4B with image-only observations.
|
|
333
|
+
|
|
@@ -0,0 +1,328 @@
|
|
|
1
|
+
# SDK Vision Support Integration
|
|
2
|
+
|
|
3
|
+
**Status**: ✅ Complete
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Added comprehensive vision/multimodal support to the synth-ai SDK's SFT data module, and integrated it with the monorepo backend for consistent multimodal data handling across both codebases.
|
|
8
|
+
|
|
9
|
+
## Changes Made
|
|
10
|
+
|
|
11
|
+
### 1. **SDK Enhancement** (`synth-ai/synth_ai/learning/sft/data.py`)
|
|
12
|
+
|
|
13
|
+
Added vision-specific utilities to the SDK:
|
|
14
|
+
|
|
15
|
+
#### New Functions
|
|
16
|
+
|
|
17
|
+
1. **`has_image_content(content: SFTMessageContent) -> bool`**
|
|
18
|
+
- Detects if message content contains images
|
|
19
|
+
- Supports OpenAI multimodal format
|
|
20
|
+
- Handles both `{"type": "image_url"}` and `{"type": "image"}` formats
|
|
21
|
+
|
|
22
|
+
2. **`message_has_image(message: SFTMessage) -> bool`**
|
|
23
|
+
- Checks if an SFTMessage contains image content
|
|
24
|
+
- Convenience wrapper around `has_image_content`
|
|
25
|
+
|
|
26
|
+
3. **`example_has_image(example: SFTExample) -> bool`**
|
|
27
|
+
- Checks if any message in an SFTExample contains images
|
|
28
|
+
- Used for filtering vision datasets
|
|
29
|
+
|
|
30
|
+
4. **`count_images_in_content(content: SFTMessageContent) -> int`**
|
|
31
|
+
- Counts number of image segments in message content
|
|
32
|
+
- Useful for statistics and validation
|
|
33
|
+
|
|
34
|
+
5. **`extract_image_urls(content: SFTMessageContent) -> list[str]`**
|
|
35
|
+
- Extracts all image URLs from message content
|
|
36
|
+
- Supports http(s):// URLs and ..."}}
|
|
130
|
+
]
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"role": "assistant",
|
|
134
|
+
"content": "I see a cat sitting on a couch."
|
|
135
|
+
}
|
|
136
|
+
],
|
|
137
|
+
"metadata": {
|
|
138
|
+
"session_id": "ep001",
|
|
139
|
+
"has_image": true
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Alternative Formats (Also Supported)
|
|
145
|
+
|
|
146
|
+
**Legacy image field**:
|
|
147
|
+
```json
|
|
148
|
+
{
|
|
149
|
+
"messages": [...],
|
|
150
|
+
"images": ["/path/to/image.jpg"],
|
|
151
|
+
"metadata": {}
|
|
152
|
+
}
|
|
153
|
+
```
|
|
154
|
+
|
|
155
|
+
**Single image field**:
|
|
156
|
+
```json
|
|
157
|
+
{
|
|
158
|
+
"messages": [...],
|
|
159
|
+
"image": "https://example.com/image.jpg",
|
|
160
|
+
"metadata": {}
|
|
161
|
+
}
|
|
162
|
+
```
|
|
163
|
+
|
|
164
|
+
## Image URL Formats
|
|
165
|
+
|
|
166
|
+
Supported image URL formats:
|
|
167
|
+
|
|
168
|
+
1. **HTTP(S) URLs**: `https://example.com/image.jpg`
|
|
169
|
+
2. **Data URLs (base64)**: `...`
|
|
170
|
+
3. **Local file paths**: `/path/to/image.jpg` (for local training only)
|
|
171
|
+
|
|
172
|
+
## Validation Rules
|
|
173
|
+
|
|
174
|
+
The SDK validates:
|
|
175
|
+
|
|
176
|
+
1. **Image presence**: At least one message must contain an image (when `require_images=True`)
|
|
177
|
+
2. **URL format**: All image URLs must be non-empty strings
|
|
178
|
+
3. **URL scheme**: URLs should start with `http://`, `https://`, or `"}}
|
|
222
|
+
]
|
|
223
|
+
assert has_image_content(content) == True
|
|
224
|
+
|
|
225
|
+
# Test validation
|
|
226
|
+
example_data = {
|
|
227
|
+
"messages": [
|
|
228
|
+
{"role": "user", "content": content},
|
|
229
|
+
{"role": "assistant", "content": "A test image"}
|
|
230
|
+
]
|
|
231
|
+
}
|
|
232
|
+
example = coerce_example(example_data)
|
|
233
|
+
is_valid, error = validate_vision_example(example)
|
|
234
|
+
assert is_valid == True
|
|
235
|
+
print("✓ SDK vision utilities working correctly!")
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Integration Test
|
|
239
|
+
|
|
240
|
+
```python
|
|
241
|
+
# Test in monorepo backend
|
|
242
|
+
from backend.app.routes.simple_training.training.sft.data import SFTDataProcessor
|
|
243
|
+
|
|
244
|
+
processor = SFTDataProcessor()
|
|
245
|
+
test_data = [{
|
|
246
|
+
"messages": [
|
|
247
|
+
{"role": "user", "content": [
|
|
248
|
+
{"type": "text", "text": "Describe this."},
|
|
249
|
+
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
|
|
250
|
+
]},
|
|
251
|
+
{"role": "assistant", "content": "Description"}
|
|
252
|
+
]
|
|
253
|
+
}]
|
|
254
|
+
|
|
255
|
+
validated = processor._validate_vision_examples(test_data)
|
|
256
|
+
assert len(validated) == 1
|
|
257
|
+
print("✓ Backend SDK integration working!")
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Future Enhancements
|
|
261
|
+
|
|
262
|
+
### Potential Additions
|
|
263
|
+
|
|
264
|
+
1. **Image preprocessing utilities**
|
|
265
|
+
- Resize images to model requirements
|
|
266
|
+
- Validate image dimensions
|
|
267
|
+
- Convert between formats (JPEG ↔ PNG)
|
|
268
|
+
|
|
269
|
+
2. **Base64 encoding helpers**
|
|
270
|
+
- Convert file paths to data URLs
|
|
271
|
+
- Batch encode images for JSONL
|
|
272
|
+
- Memory-efficient streaming
|
|
273
|
+
|
|
274
|
+
3. **Statistics and analytics**
|
|
275
|
+
- Count images per example
|
|
276
|
+
- Measure average image sizes
|
|
277
|
+
- Detect corrupted or invalid images
|
|
278
|
+
|
|
279
|
+
4. **Dataset transformation**
|
|
280
|
+
- Convert between formats
|
|
281
|
+
- Augment with additional images
|
|
282
|
+
- Filter by image properties
|
|
283
|
+
|
|
284
|
+
## Migration Guide
|
|
285
|
+
|
|
286
|
+
### For Existing Backend Code
|
|
287
|
+
|
|
288
|
+
If you have existing vision validation code:
|
|
289
|
+
|
|
290
|
+
```python
|
|
291
|
+
# Before (manual validation)
|
|
292
|
+
def has_images(messages):
|
|
293
|
+
for msg in messages:
|
|
294
|
+
content = msg.get("content")
|
|
295
|
+
if isinstance(content, list):
|
|
296
|
+
for part in content:
|
|
297
|
+
if part.get("type") == "image_url":
|
|
298
|
+
return True
|
|
299
|
+
return False
|
|
300
|
+
|
|
301
|
+
# After (use SDK)
|
|
302
|
+
from synth_ai.learning.sft.data import has_image_content
|
|
303
|
+
|
|
304
|
+
def has_images(messages):
|
|
305
|
+
return any(has_image_content(msg.get("content")) for msg in messages)
|
|
306
|
+
```
|
|
307
|
+
|
|
308
|
+
### For Existing SDK Code
|
|
309
|
+
|
|
310
|
+
No changes needed! The SDK already handles OpenAI message formats correctly. Vision utilities are additive and don't break existing functionality.
|
|
311
|
+
|
|
312
|
+
## Documentation
|
|
313
|
+
|
|
314
|
+
- **SDK docs**: See `synth_ai/learning/sft/data.py` docstrings
|
|
315
|
+
- **Backend docs**: See `backend/app/routes/simple_training/training/sft/data.py` class docstring
|
|
316
|
+
- **Examples**: See `synth-ai/examples/qwen_vl/` for vision-specific examples
|
|
317
|
+
|
|
318
|
+
## Related Files
|
|
319
|
+
|
|
320
|
+
- SDK: `synth-ai/synth_ai/learning/sft/data.py`
|
|
321
|
+
- Backend: `monorepo/backend/app/routes/simple_training/training/sft/data.py`
|
|
322
|
+
- Examples: `synth-ai/examples/qwen_vl/`
|
|
323
|
+
- Pipeline guide: `synth-ai/examples/qwen_vl/NEXT_STEPS_2B.md`
|
|
324
|
+
|
|
325
|
+
---
|
|
326
|
+
|
|
327
|
+
✅ **SDK vision support is now production-ready for both synth-ai and monorepo!**
|
|
328
|
+
|