synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,300 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Crafter agent using Qwen-VL models via synth-ai's hosted inference.
|
|
4
|
+
|
|
5
|
+
This demonstrates vision-language models (Qwen3-VL family) playing Crafter
|
|
6
|
+
with image observations. The CrafterPolicy automatically detects vision capability
|
|
7
|
+
from the model name and includes base64-encoded PNG frames in the prompt.
|
|
8
|
+
|
|
9
|
+
Requirements:
|
|
10
|
+
- `SYNTH_API_KEY` environment variable (for synth-ai hosted inference)
|
|
11
|
+
- synth-ai package with Crafter task app dependencies
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
|
|
15
|
+
--model Qwen/Qwen3-VL-8B-Instruct --seeds 10 --steps 20
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import asyncio
|
|
22
|
+
import base64
|
|
23
|
+
import json
|
|
24
|
+
import os
|
|
25
|
+
from contextlib import suppress
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Any
|
|
28
|
+
from uuid import uuid4
|
|
29
|
+
|
|
30
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.environment import (
|
|
31
|
+
CrafterEnvironmentWrapper,
|
|
32
|
+
)
|
|
33
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.policy import CrafterPolicy
|
|
34
|
+
from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
|
|
35
|
+
from synth_ai.environments.examples.crafter_classic.taskset import (
|
|
36
|
+
CrafterTaskInstance,
|
|
37
|
+
CrafterTaskInstanceMetadata,
|
|
38
|
+
)
|
|
39
|
+
from synth_ai.environments.tasks.core import Impetus, Intent
|
|
40
|
+
|
|
41
|
+
# Import synth-ai inference client
|
|
42
|
+
try:
|
|
43
|
+
from synth_ai.inference.client import InferenceClient
|
|
44
|
+
except ImportError:
|
|
45
|
+
print("Error: synth-ai inference client not found. Make sure synth-ai is installed.")
|
|
46
|
+
raise
|
|
47
|
+
|
|
48
|
+
DEFAULT_OUTPUT = Path("examples/qwen_vl/temp")
|
|
49
|
+
FRAME_SUBDIR = "qwen_vl_frames"
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def _default_backend_base_url() -> str:
|
|
53
|
+
raw = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api").strip()
|
|
54
|
+
return raw if raw.endswith("/api") else f"{raw}/api"
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class EpisodeResult:
|
|
58
|
+
def __init__(self, seed: int) -> None:
|
|
59
|
+
self.seed = seed
|
|
60
|
+
self.steps_taken: int = 0
|
|
61
|
+
self.achievements: set[str] = set()
|
|
62
|
+
self.total_reward: float = 0.0
|
|
63
|
+
self.tool_calls: int = 0
|
|
64
|
+
|
|
65
|
+
def record_observation(self, observation: dict[str, Any]) -> None:
|
|
66
|
+
obs = observation.get("observation") if isinstance(observation, dict) else None
|
|
67
|
+
if not isinstance(obs, dict):
|
|
68
|
+
return
|
|
69
|
+
ach = obs.get("achievements_status")
|
|
70
|
+
if isinstance(ach, dict):
|
|
71
|
+
for name, unlocked in ach.items():
|
|
72
|
+
if unlocked:
|
|
73
|
+
self.achievements.add(str(name))
|
|
74
|
+
reward = obs.get("reward_last_step")
|
|
75
|
+
if isinstance(reward, int | float):
|
|
76
|
+
self.total_reward += float(reward)
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _ensure_synth_client() -> InferenceClient:
|
|
80
|
+
"""Initialize synth-ai inference client."""
|
|
81
|
+
api_key = os.getenv("SYNTH_API_KEY")
|
|
82
|
+
if not api_key:
|
|
83
|
+
raise RuntimeError(
|
|
84
|
+
"SYNTH_API_KEY must be set for synth-ai hosted inference. "
|
|
85
|
+
"Get your key from https://synth-ai.com"
|
|
86
|
+
)
|
|
87
|
+
base_url = os.getenv("SYNTH_BASE_URL", _default_backend_base_url())
|
|
88
|
+
return InferenceClient(base_url=base_url, api_key=api_key)
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
def _build_task_instance(seed: int) -> CrafterTaskInstance:
|
|
92
|
+
"""Create a Crafter task instance with specified seed."""
|
|
93
|
+
impetus = Impetus(instructions="Explore, survive, and unlock achievements.")
|
|
94
|
+
intent = Intent(
|
|
95
|
+
rubric={"goal": "Maximise Crafter achievements."},
|
|
96
|
+
gold_trajectories=None,
|
|
97
|
+
gold_state_diff={},
|
|
98
|
+
)
|
|
99
|
+
metadata = CrafterTaskInstanceMetadata(
|
|
100
|
+
difficulty="custom",
|
|
101
|
+
seed=seed,
|
|
102
|
+
num_trees_radius=0,
|
|
103
|
+
num_cows_radius=0,
|
|
104
|
+
num_hostiles_radius=0,
|
|
105
|
+
)
|
|
106
|
+
instance = CrafterTaskInstance(
|
|
107
|
+
id=uuid4(),
|
|
108
|
+
impetus=impetus,
|
|
109
|
+
intent=intent,
|
|
110
|
+
metadata=metadata,
|
|
111
|
+
is_reproducible=True,
|
|
112
|
+
initial_engine_snapshot=None,
|
|
113
|
+
)
|
|
114
|
+
setattr(instance, "config", {"seed": seed, "length": 256, "area": [64, 64]})
|
|
115
|
+
return instance
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _decode_and_save_image(observation: dict[str, Any], path: Path) -> None:
|
|
119
|
+
"""Extract and save PNG frame from observation."""
|
|
120
|
+
obs = observation.get("observation") if isinstance(observation, dict) else None
|
|
121
|
+
if not isinstance(obs, dict):
|
|
122
|
+
return
|
|
123
|
+
base64_data = obs.get("observation_image_base64")
|
|
124
|
+
if not isinstance(base64_data, str) or not base64_data:
|
|
125
|
+
return
|
|
126
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
127
|
+
with suppress(Exception):
|
|
128
|
+
path.write_bytes(base64.b64decode(base64_data))
|
|
129
|
+
|
|
130
|
+
|
|
131
|
+
async def _run_episode(
|
|
132
|
+
*,
|
|
133
|
+
seed: int,
|
|
134
|
+
client: InferenceClient,
|
|
135
|
+
model: str,
|
|
136
|
+
max_steps: int,
|
|
137
|
+
output_dir: Path,
|
|
138
|
+
temperature: float,
|
|
139
|
+
) -> EpisodeResult:
|
|
140
|
+
"""Run a single Crafter episode with Qwen-VL."""
|
|
141
|
+
task_instance = _build_task_instance(seed)
|
|
142
|
+
env = CrafterClassicEnvironment(task_instance)
|
|
143
|
+
wrapper = CrafterEnvironmentWrapper(env, seed=seed)
|
|
144
|
+
|
|
145
|
+
# Policy will auto-detect vision from model name (qwen-vl and qwen3-vl tokens)
|
|
146
|
+
policy = CrafterPolicy(inference_url="synth://inference", model=model)
|
|
147
|
+
await policy.initialize({
|
|
148
|
+
"use_tools": True,
|
|
149
|
+
"model": model,
|
|
150
|
+
"temperature": temperature,
|
|
151
|
+
"max_tokens": 512,
|
|
152
|
+
})
|
|
153
|
+
|
|
154
|
+
episode_result = EpisodeResult(seed=seed)
|
|
155
|
+
|
|
156
|
+
observation_packet = await wrapper.initialize()
|
|
157
|
+
episode_result.record_observation(observation_packet)
|
|
158
|
+
|
|
159
|
+
frames_root = output_dir / FRAME_SUBDIR / f"seed_{seed:04d}"
|
|
160
|
+
_decode_and_save_image(observation_packet, frames_root / "step_000.png")
|
|
161
|
+
|
|
162
|
+
for step_idx in range(max_steps):
|
|
163
|
+
obs_dict = observation_packet.get("observation")
|
|
164
|
+
if not isinstance(obs_dict, dict):
|
|
165
|
+
break
|
|
166
|
+
|
|
167
|
+
# Format observation text
|
|
168
|
+
obs_text = policy._format_observation_for_llm(observation_packet) # noqa: SLF001
|
|
169
|
+
|
|
170
|
+
# Get tool calls from policy (it prepares the inference request internally)
|
|
171
|
+
tool_calls, meta = await policy.step(
|
|
172
|
+
observation_text=obs_text,
|
|
173
|
+
metadata={"raw_observation": observation_packet},
|
|
174
|
+
)
|
|
175
|
+
if "inference_request" not in meta:
|
|
176
|
+
break
|
|
177
|
+
|
|
178
|
+
episode_result.steps_taken += 1
|
|
179
|
+
inference_request = meta["inference_request"]
|
|
180
|
+
|
|
181
|
+
# Call synth-ai hosted inference
|
|
182
|
+
response = await client.create_chat_completion(
|
|
183
|
+
model=model,
|
|
184
|
+
messages=inference_request["messages"],
|
|
185
|
+
temperature=temperature,
|
|
186
|
+
max_tokens=512,
|
|
187
|
+
tools=inference_request.get("tools"),
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
# Parse tool calls from response
|
|
191
|
+
assistant_tool_calls = CrafterPolicy.parse_response_to_tool_calls(
|
|
192
|
+
response,
|
|
193
|
+
use_tools=policy.use_tools,
|
|
194
|
+
)
|
|
195
|
+
if not assistant_tool_calls:
|
|
196
|
+
print(
|
|
197
|
+
f"Seed {seed}: no tool calls returned by model; ending episode early at step {step_idx}."
|
|
198
|
+
)
|
|
199
|
+
break
|
|
200
|
+
|
|
201
|
+
episode_result.tool_calls += len(assistant_tool_calls)
|
|
202
|
+
|
|
203
|
+
# Extract assistant message
|
|
204
|
+
assistant_message = response.get("choices", [{}])[0].get("message", {})
|
|
205
|
+
assistant_text = assistant_message.get("content")
|
|
206
|
+
|
|
207
|
+
# Execute action in environment
|
|
208
|
+
env_response = await wrapper.step(assistant_tool_calls)
|
|
209
|
+
if not isinstance(env_response, dict):
|
|
210
|
+
raise RuntimeError(
|
|
211
|
+
f"Unexpected environment response type: {type(env_response)!r}"
|
|
212
|
+
)
|
|
213
|
+
episode_result.record_observation(env_response)
|
|
214
|
+
|
|
215
|
+
# Update policy history
|
|
216
|
+
policy._append_assistant_turn( # noqa: SLF001
|
|
217
|
+
assistant_text,
|
|
218
|
+
assistant_tool_calls,
|
|
219
|
+
env_response,
|
|
220
|
+
)
|
|
221
|
+
|
|
222
|
+
# Save frame
|
|
223
|
+
frame_path = frames_root / f"step_{step_idx + 1:03d}.png"
|
|
224
|
+
_decode_and_save_image(env_response, frame_path)
|
|
225
|
+
|
|
226
|
+
if env_response.get("done"):
|
|
227
|
+
break
|
|
228
|
+
observation_packet = env_response
|
|
229
|
+
|
|
230
|
+
await wrapper.terminate()
|
|
231
|
+
return episode_result
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
async def main() -> None:
|
|
235
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
236
|
+
parser.add_argument(
|
|
237
|
+
"--model",
|
|
238
|
+
default="Qwen/Qwen3-VL-8B-Instruct",
|
|
239
|
+
help="Qwen-VL model name (e.g., Qwen/Qwen3-VL-2B-Instruct, Qwen/Qwen3-VL-8B-Instruct)",
|
|
240
|
+
)
|
|
241
|
+
parser.add_argument("--seeds", type=int, default=10, help="Number of random seeds to evaluate")
|
|
242
|
+
parser.add_argument("--steps", type=int, default=20, help="Max steps per seed")
|
|
243
|
+
parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature")
|
|
244
|
+
parser.add_argument(
|
|
245
|
+
"--output-dir",
|
|
246
|
+
type=Path,
|
|
247
|
+
default=DEFAULT_OUTPUT,
|
|
248
|
+
help=f"Directory for saved frames and summaries (default: {DEFAULT_OUTPUT})",
|
|
249
|
+
)
|
|
250
|
+
args = parser.parse_args()
|
|
251
|
+
|
|
252
|
+
client = _ensure_synth_client()
|
|
253
|
+
results: list[EpisodeResult] = []
|
|
254
|
+
|
|
255
|
+
seeds = list(range(args.seeds))
|
|
256
|
+
print(f"Running {len(seeds)} Crafter episodes with model={args.model}")
|
|
257
|
+
print(f"Using synth-ai hosted inference\n")
|
|
258
|
+
|
|
259
|
+
for seed in seeds:
|
|
260
|
+
result = await _run_episode(
|
|
261
|
+
seed=seed,
|
|
262
|
+
client=client,
|
|
263
|
+
model=args.model,
|
|
264
|
+
max_steps=args.steps,
|
|
265
|
+
output_dir=args.output_dir,
|
|
266
|
+
temperature=args.temperature,
|
|
267
|
+
)
|
|
268
|
+
results.append(result)
|
|
269
|
+
print(
|
|
270
|
+
f"Seed {seed:02d}: steps={result.steps_taken}, "
|
|
271
|
+
f"achievements={len(result.achievements)}, "
|
|
272
|
+
f"tool_calls={result.tool_calls}, reward≈{result.total_reward:.3f}"
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
summary = {
|
|
276
|
+
"model": args.model,
|
|
277
|
+
"provider": "synth-ai",
|
|
278
|
+
"episodes": len(results),
|
|
279
|
+
"mean_steps": round(
|
|
280
|
+
sum(res.steps_taken for res in results) / max(len(results), 1), 2
|
|
281
|
+
),
|
|
282
|
+
"mean_achievements": round(
|
|
283
|
+
sum(len(res.achievements) for res in results) / max(len(results), 1), 2
|
|
284
|
+
),
|
|
285
|
+
"total_tool_calls": sum(res.tool_calls for res in results),
|
|
286
|
+
"output_dir": str(args.output_dir / FRAME_SUBDIR),
|
|
287
|
+
}
|
|
288
|
+
|
|
289
|
+
args.output_dir.mkdir(parents=True, exist_ok=True)
|
|
290
|
+
summary_path = args.output_dir / "qwen_vl_summary.json"
|
|
291
|
+
summary_path.write_text(json.dumps(summary, indent=2), encoding="utf-8")
|
|
292
|
+
|
|
293
|
+
print("\nSummary")
|
|
294
|
+
print("-------")
|
|
295
|
+
print(json.dumps(summary, indent=2))
|
|
296
|
+
print(f"\nFrames saved in: {summary['output_dir']}")
|
|
297
|
+
|
|
298
|
+
|
|
299
|
+
if __name__ == "__main__":
|
|
300
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Compare Qwen-VL (via synth) vs gpt-5-nano (via OpenAI) on Crafter
|
|
3
|
+
|
|
4
|
+
set -e
|
|
5
|
+
|
|
6
|
+
SEEDS=10
|
|
7
|
+
STEPS=20
|
|
8
|
+
OUTPUT_DIR="examples/qwen_vl/temp/comparison"
|
|
9
|
+
|
|
10
|
+
echo "======================================"
|
|
11
|
+
echo "Vision Model Comparison on Crafter"
|
|
12
|
+
echo "======================================"
|
|
13
|
+
echo ""
|
|
14
|
+
echo "Running $SEEDS episodes, $STEPS steps each"
|
|
15
|
+
echo ""
|
|
16
|
+
|
|
17
|
+
# Check API keys
|
|
18
|
+
if [ -z "$OPENAI_API_KEY" ]; then
|
|
19
|
+
echo "Error: OPENAI_API_KEY not set"
|
|
20
|
+
exit 1
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
if [ -z "$SYNTH_API_KEY" ]; then
|
|
24
|
+
echo "Error: SYNTH_API_KEY not set"
|
|
25
|
+
exit 1
|
|
26
|
+
fi
|
|
27
|
+
|
|
28
|
+
# Run gpt-5-nano
|
|
29
|
+
echo "======================================"
|
|
30
|
+
echo "1. Running gpt-5-nano (OpenAI)"
|
|
31
|
+
echo "======================================"
|
|
32
|
+
uv run python examples/qwen_vl/crafter_gpt5nano_agent.py \
|
|
33
|
+
--model gpt-5-nano \
|
|
34
|
+
--seeds $SEEDS \
|
|
35
|
+
--steps $STEPS \
|
|
36
|
+
--output-dir "$OUTPUT_DIR/gpt5nano"
|
|
37
|
+
|
|
38
|
+
echo ""
|
|
39
|
+
echo "======================================"
|
|
40
|
+
echo "2. Running Qwen3-VL-8B (synth-ai)"
|
|
41
|
+
echo "======================================"
|
|
42
|
+
uv run python examples/qwen_vl/crafter_qwen_vl_agent.py \
|
|
43
|
+
--model Qwen/Qwen3-VL-8B-Instruct \
|
|
44
|
+
--seeds $SEEDS \
|
|
45
|
+
--steps $STEPS \
|
|
46
|
+
--output-dir "$OUTPUT_DIR/qwen3vl"
|
|
47
|
+
|
|
48
|
+
echo ""
|
|
49
|
+
echo "======================================"
|
|
50
|
+
echo "Results Summary"
|
|
51
|
+
echo "======================================"
|
|
52
|
+
echo ""
|
|
53
|
+
echo "gpt-5-nano (OpenAI):"
|
|
54
|
+
cat "$OUTPUT_DIR/gpt5nano/gpt5nano_summary.json" | python -m json.tool
|
|
55
|
+
echo ""
|
|
56
|
+
echo "Qwen3-VL-8B (synth-ai):"
|
|
57
|
+
cat "$OUTPUT_DIR/qwen3vl/qwen_vl_summary.json" | python -m json.tool
|
|
58
|
+
echo ""
|
|
59
|
+
echo "Frames saved in:"
|
|
60
|
+
echo " - $OUTPUT_DIR/gpt5nano/gpt5nano_frames/"
|
|
61
|
+
echo " - $OUTPUT_DIR/qwen3vl/qwen_vl_frames/"
|
|
@@ -0,0 +1,175 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Complete pipeline: Collect vision traces → Filter → Train SFT
|
|
3
|
+
# Uses synth-ai CLI tools for data collection and processing
|
|
4
|
+
|
|
5
|
+
set -e
|
|
6
|
+
|
|
7
|
+
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
8
|
+
# Allow callers to override root paths, otherwise derive them relative to this script.
|
|
9
|
+
SYNTH_DIR="${SYNTH_DIR:-$(cd "$SCRIPT_DIR/../.." && pwd)}"
|
|
10
|
+
DEFAULT_MONOREPO_DIR="$(cd "$SYNTH_DIR/.." && pwd)/monorepo"
|
|
11
|
+
MONOREPO_DIR="${MONOREPO_DIR:-$DEFAULT_MONOREPO_DIR}"
|
|
12
|
+
|
|
13
|
+
if [ ! -d "$SYNTH_DIR" ]; then
|
|
14
|
+
echo "Error: synth-ai repository not found at: $SYNTH_DIR"
|
|
15
|
+
exit 1
|
|
16
|
+
fi
|
|
17
|
+
|
|
18
|
+
if [ ! -d "$MONOREPO_DIR" ]; then
|
|
19
|
+
echo "Warning: MONOREPO_DIR not found at: $MONOREPO_DIR"
|
|
20
|
+
echo " Set MONOREPO_DIR to a valid path if you plan to run the optional training step."
|
|
21
|
+
fi
|
|
22
|
+
|
|
23
|
+
# Configuration
|
|
24
|
+
MODEL="gpt-5-nano"
|
|
25
|
+
PROVIDER="openai"
|
|
26
|
+
NUM_EPISODES=100
|
|
27
|
+
OUTPUT_DIR="traces/gpt5nano_vision"
|
|
28
|
+
|
|
29
|
+
echo "======================================"
|
|
30
|
+
echo "Vision SFT Pipeline for Crafter"
|
|
31
|
+
echo "======================================"
|
|
32
|
+
echo ""
|
|
33
|
+
echo "Model: $MODEL"
|
|
34
|
+
echo "Provider: $PROVIDER"
|
|
35
|
+
echo "Episodes: $NUM_EPISODES"
|
|
36
|
+
echo "Output: $OUTPUT_DIR"
|
|
37
|
+
echo ""
|
|
38
|
+
|
|
39
|
+
# Check API keys
|
|
40
|
+
if [ "$PROVIDER" = "openai" ]; then
|
|
41
|
+
if [ -z "$OPENAI_API_KEY" ]; then
|
|
42
|
+
echo "Error: OPENAI_API_KEY not set"
|
|
43
|
+
exit 1
|
|
44
|
+
fi
|
|
45
|
+
echo "✓ OpenAI API key found"
|
|
46
|
+
elif [ "$PROVIDER" = "synth" ]; then
|
|
47
|
+
if [ -z "$SYNTH_API_KEY" ]; then
|
|
48
|
+
echo "Error: SYNTH_API_KEY not set"
|
|
49
|
+
exit 1
|
|
50
|
+
fi
|
|
51
|
+
echo "✓ Synth API key found"
|
|
52
|
+
fi
|
|
53
|
+
|
|
54
|
+
if [ -z "$BACKEND_BASE_URL" ]; then
|
|
55
|
+
echo "Warning: BACKEND_BASE_URL not set, using default"
|
|
56
|
+
export BACKEND_BASE_URL="https://synth-backend-dev-docker.onrender.com/api"
|
|
57
|
+
fi
|
|
58
|
+
|
|
59
|
+
echo ""
|
|
60
|
+
|
|
61
|
+
# Step 1: Collect traces
|
|
62
|
+
echo "======================================"
|
|
63
|
+
echo "STEP 1: Collect Vision Traces"
|
|
64
|
+
echo "======================================"
|
|
65
|
+
echo ""
|
|
66
|
+
echo "Running $NUM_EPISODES episodes with $MODEL..."
|
|
67
|
+
echo "This will take ~30-60 minutes"
|
|
68
|
+
echo ""
|
|
69
|
+
|
|
70
|
+
cd "$SYNTH_DIR"
|
|
71
|
+
|
|
72
|
+
uvx synth-ai eval \
|
|
73
|
+
--config examples/qwen_vl/configs/eval_${PROVIDER}_${MODEL/\//_}_vision.toml \
|
|
74
|
+
--output-dir "$OUTPUT_DIR" \
|
|
75
|
+
|| {
|
|
76
|
+
# Fallback to gpt5nano config if custom config not found
|
|
77
|
+
uvx synth-ai eval \
|
|
78
|
+
--config examples/qwen_vl/configs/eval_gpt5nano_vision.toml \
|
|
79
|
+
--output-dir "$OUTPUT_DIR"
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
echo ""
|
|
83
|
+
echo "✅ Trace collection complete!"
|
|
84
|
+
echo ""
|
|
85
|
+
|
|
86
|
+
# Step 2: Filter and export to SFT format
|
|
87
|
+
echo "======================================"
|
|
88
|
+
echo "STEP 2: Filter & Export to SFT JSONL"
|
|
89
|
+
echo "======================================"
|
|
90
|
+
echo ""
|
|
91
|
+
|
|
92
|
+
uvx synth-ai filter \
|
|
93
|
+
--config examples/qwen_vl/configs/filter_vision_sft.toml \
|
|
94
|
+
--input-db "$OUTPUT_DIR/rollouts.db" \
|
|
95
|
+
--output-dir "$OUTPUT_DIR/sft"
|
|
96
|
+
|
|
97
|
+
echo ""
|
|
98
|
+
echo "✅ Filtering complete!"
|
|
99
|
+
echo ""
|
|
100
|
+
|
|
101
|
+
# Show dataset stats
|
|
102
|
+
echo "======================================"
|
|
103
|
+
echo "Dataset Statistics"
|
|
104
|
+
echo "======================================"
|
|
105
|
+
echo ""
|
|
106
|
+
|
|
107
|
+
if [ -f "$OUTPUT_DIR/sft/filter_stats.json" ]; then
|
|
108
|
+
cat "$OUTPUT_DIR/sft/filter_stats.json" | python3 -m json.tool
|
|
109
|
+
else
|
|
110
|
+
echo "Train samples: $(wc -l < "$OUTPUT_DIR/sft/train.jsonl")"
|
|
111
|
+
echo "Val samples: $(wc -l < "$OUTPUT_DIR/sft/val.jsonl")"
|
|
112
|
+
fi
|
|
113
|
+
|
|
114
|
+
echo ""
|
|
115
|
+
|
|
116
|
+
# Step 3: Train SFT (optional - user can run this separately)
|
|
117
|
+
echo "======================================"
|
|
118
|
+
echo "STEP 3: Train Vision SFT (Optional)"
|
|
119
|
+
echo "======================================"
|
|
120
|
+
echo ""
|
|
121
|
+
echo "To train the model, run:"
|
|
122
|
+
echo ""
|
|
123
|
+
echo " cd $MONOREPO_DIR"
|
|
124
|
+
echo " uvx synth-ai train \\"
|
|
125
|
+
echo " --type sft \\"
|
|
126
|
+
echo " --config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml \\"
|
|
127
|
+
echo " --dataset $SYNTH_DIR/$OUTPUT_DIR/sft/train.jsonl \\"
|
|
128
|
+
echo " --eval-dataset $SYNTH_DIR/$OUTPUT_DIR/sft/val.jsonl \\"
|
|
129
|
+
echo " --env-file backend/.env.dev"
|
|
130
|
+
echo ""
|
|
131
|
+
|
|
132
|
+
read -p "Run training now? (y/N) " -n 1 -r
|
|
133
|
+
echo
|
|
134
|
+
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
|
135
|
+
echo ""
|
|
136
|
+
echo "Starting SFT training..."
|
|
137
|
+
echo ""
|
|
138
|
+
|
|
139
|
+
if [ ! -d "$MONOREPO_DIR" ]; then
|
|
140
|
+
echo "Error: MONOREPO_DIR not found. Set MONOREPO_DIR to your monorepo path before running training."
|
|
141
|
+
exit 1
|
|
142
|
+
fi
|
|
143
|
+
|
|
144
|
+
cd "$MONOREPO_DIR"
|
|
145
|
+
|
|
146
|
+
uvx synth-ai train \
|
|
147
|
+
--type sft \
|
|
148
|
+
--config configs/vision_sft/crafter_qwen3vl_8b_gpt5nano.toml \
|
|
149
|
+
--dataset "$SYNTH_DIR/$OUTPUT_DIR/sft/train.jsonl" \
|
|
150
|
+
--eval-dataset "$SYNTH_DIR/$OUTPUT_DIR/sft/val.jsonl" \
|
|
151
|
+
--env-file backend/.env.dev
|
|
152
|
+
|
|
153
|
+
echo ""
|
|
154
|
+
echo "✅ Training complete!"
|
|
155
|
+
else
|
|
156
|
+
echo ""
|
|
157
|
+
echo "Skipping training. You can run it later using the command above."
|
|
158
|
+
fi
|
|
159
|
+
|
|
160
|
+
echo ""
|
|
161
|
+
echo "======================================"
|
|
162
|
+
echo "Pipeline Complete!"
|
|
163
|
+
echo "======================================"
|
|
164
|
+
echo ""
|
|
165
|
+
echo "📂 Outputs:"
|
|
166
|
+
echo " - Raw traces: $OUTPUT_DIR/rollouts.db"
|
|
167
|
+
echo " - SFT train: $OUTPUT_DIR/sft/train.jsonl"
|
|
168
|
+
echo " - SFT val: $OUTPUT_DIR/sft/val.jsonl"
|
|
169
|
+
echo " - Stats: $OUTPUT_DIR/sft/filter_stats.json"
|
|
170
|
+
echo ""
|
|
171
|
+
echo "🚀 Next steps:"
|
|
172
|
+
echo " 1. Train SFT model (see command above)"
|
|
173
|
+
echo " 2. Evaluate trained model"
|
|
174
|
+
echo " 3. Fine-tune with RL"
|
|
175
|
+
echo ""
|