synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,368 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Collect Crafter vision traces for SFT dataset creation.
|
|
4
|
+
|
|
5
|
+
Supports both:
|
|
6
|
+
1. OpenAI models (gpt-5-nano, gpt-4o-mini) via OpenAI API
|
|
7
|
+
2. Qwen-VL models via synth-ai hosted inference
|
|
8
|
+
|
|
9
|
+
Traces are stored in SQLite with full multimodal messages (text + base64 images)
|
|
10
|
+
ready for export to SFT JSONL format.
|
|
11
|
+
|
|
12
|
+
Requirements:
|
|
13
|
+
- For OpenAI: OPENAI_API_KEY environment variable
|
|
14
|
+
- For synth-ai: SYNTH_API_KEY environment variable
|
|
15
|
+
|
|
16
|
+
Usage:
|
|
17
|
+
# Collect with gpt-5-nano
|
|
18
|
+
uv run python examples/qwen_vl/collect_vision_traces.py \
|
|
19
|
+
--model gpt-5-nano \
|
|
20
|
+
--provider openai \
|
|
21
|
+
--episodes 100 \
|
|
22
|
+
--max-steps 50 \
|
|
23
|
+
--output-dir traces/gpt5nano_vision
|
|
24
|
+
|
|
25
|
+
# Collect with Qwen3-VL via synth
|
|
26
|
+
uv run python examples/qwen_vl/collect_vision_traces.py \
|
|
27
|
+
--model Qwen/Qwen3-VL-8B-Instruct \
|
|
28
|
+
--provider synth \
|
|
29
|
+
--episodes 100 \
|
|
30
|
+
--max-steps 50 \
|
|
31
|
+
--output-dir traces/qwen3vl_vision
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
from __future__ import annotations
|
|
35
|
+
|
|
36
|
+
import argparse
|
|
37
|
+
import asyncio
|
|
38
|
+
import json
|
|
39
|
+
import logging
|
|
40
|
+
import os
|
|
41
|
+
from pathlib import Path
|
|
42
|
+
from typing import Any, cast
|
|
43
|
+
from uuid import uuid4
|
|
44
|
+
|
|
45
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.environment import (
|
|
46
|
+
CrafterEnvironmentWrapper,
|
|
47
|
+
)
|
|
48
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.policy import CrafterPolicy
|
|
49
|
+
from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
|
|
50
|
+
from synth_ai.environments.examples.crafter_classic.taskset import (
|
|
51
|
+
CrafterTaskInstance,
|
|
52
|
+
CrafterTaskInstanceMetadata,
|
|
53
|
+
)
|
|
54
|
+
from synth_ai.environments.tasks.core import Impetus, Intent
|
|
55
|
+
|
|
56
|
+
# Try importing trace storage
|
|
57
|
+
try:
|
|
58
|
+
from synth_ai.tracing_v3.storage import create_storage
|
|
59
|
+
from synth_ai.tracing_v3.storage.config import StorageBackend, StorageConfig
|
|
60
|
+
TRACING_AVAILABLE = True
|
|
61
|
+
except ImportError:
|
|
62
|
+
print("Warning: Tracing storage not available. Traces will not be persisted.")
|
|
63
|
+
TRACING_AVAILABLE = False
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def _get_openai_client():
|
|
67
|
+
"""Get OpenAI client."""
|
|
68
|
+
from openai import OpenAI
|
|
69
|
+
|
|
70
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
71
|
+
if not api_key:
|
|
72
|
+
raise RuntimeError("OPENAI_API_KEY not set")
|
|
73
|
+
return OpenAI(api_key=api_key)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _default_backend_base_url() -> str:
|
|
77
|
+
raw = os.getenv("BACKEND_BASE_URL", "https://agent-learning.onrender.com/api").strip()
|
|
78
|
+
return raw if raw.endswith("/api") else f"{raw}/api"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _get_synth_client():
|
|
82
|
+
"""Get synth-ai inference client."""
|
|
83
|
+
from synth_ai.inference.client import InferenceClient
|
|
84
|
+
|
|
85
|
+
api_key = os.getenv("SYNTH_API_KEY")
|
|
86
|
+
if not api_key:
|
|
87
|
+
raise RuntimeError("SYNTH_API_KEY not set")
|
|
88
|
+
base_url = os.getenv("SYNTH_BASE_URL", _default_backend_base_url())
|
|
89
|
+
return InferenceClient(base_url=base_url, api_key=api_key)
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
def _build_task_instance(seed: int) -> CrafterTaskInstance:
|
|
93
|
+
"""Create Crafter task instance."""
|
|
94
|
+
impetus = Impetus(instructions="Explore, survive, and unlock achievements.")
|
|
95
|
+
intent = Intent(
|
|
96
|
+
rubric={"goal": "Maximise Crafter achievements."},
|
|
97
|
+
gold_trajectories=None,
|
|
98
|
+
gold_state_diff={},
|
|
99
|
+
)
|
|
100
|
+
metadata = CrafterTaskInstanceMetadata(
|
|
101
|
+
difficulty="custom",
|
|
102
|
+
seed=seed,
|
|
103
|
+
num_trees_radius=0,
|
|
104
|
+
num_cows_radius=0,
|
|
105
|
+
num_hostiles_radius=0,
|
|
106
|
+
)
|
|
107
|
+
instance = CrafterTaskInstance(
|
|
108
|
+
id=uuid4(),
|
|
109
|
+
impetus=impetus,
|
|
110
|
+
intent=intent,
|
|
111
|
+
metadata=metadata,
|
|
112
|
+
is_reproducible=True,
|
|
113
|
+
initial_engine_snapshot=None,
|
|
114
|
+
)
|
|
115
|
+
setattr(instance, "config", {"seed": seed, "length": 256, "area": [64, 64]})
|
|
116
|
+
return instance
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
|
|
120
|
+
"""Normalize inference request for OpenAI API."""
|
|
121
|
+
request = dict(payload)
|
|
122
|
+
request["model"] = model
|
|
123
|
+
|
|
124
|
+
# Remove vendor-specific knobs
|
|
125
|
+
request.pop("stop_after_tool_calls", None)
|
|
126
|
+
request.pop("thinking_mode", None)
|
|
127
|
+
request.pop("thinking_budget", None)
|
|
128
|
+
|
|
129
|
+
# gpt-5 models have specific requirements
|
|
130
|
+
if "gpt-5" in model.lower():
|
|
131
|
+
# gpt-5-nano only supports temperature=1 (default)
|
|
132
|
+
request.pop("temperature", None) # Remove custom temperature
|
|
133
|
+
request.setdefault("max_completion_tokens", 512)
|
|
134
|
+
request.pop("max_tokens", None) # Remove if present
|
|
135
|
+
else:
|
|
136
|
+
# Older models use max_tokens and support custom temperature
|
|
137
|
+
request.setdefault("temperature", temperature)
|
|
138
|
+
max_completion = request.pop("max_completion_tokens", None)
|
|
139
|
+
if max_completion is not None:
|
|
140
|
+
request["max_tokens"] = max_completion
|
|
141
|
+
else:
|
|
142
|
+
request.setdefault("max_tokens", 512)
|
|
143
|
+
|
|
144
|
+
return request
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
async def collect_traces(
|
|
148
|
+
model: str,
|
|
149
|
+
provider: str,
|
|
150
|
+
num_episodes: int,
|
|
151
|
+
max_steps: int,
|
|
152
|
+
seed_start: int,
|
|
153
|
+
output_dir: Path,
|
|
154
|
+
temperature: float,
|
|
155
|
+
):
|
|
156
|
+
"""Collect vision traces for SFT."""
|
|
157
|
+
# Setup tracing store
|
|
158
|
+
if not TRACING_AVAILABLE:
|
|
159
|
+
raise RuntimeError("Tracing storage not available. Cannot persist traces.")
|
|
160
|
+
|
|
161
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
162
|
+
db_path = output_dir / "rollouts.db"
|
|
163
|
+
storage_config = StorageConfig(
|
|
164
|
+
backend=StorageBackend.SQLITE,
|
|
165
|
+
connection_string=f"sqlite+aiosqlite:///{db_path}",
|
|
166
|
+
)
|
|
167
|
+
tracing_store = create_storage(storage_config)
|
|
168
|
+
await tracing_store.initialize()
|
|
169
|
+
|
|
170
|
+
# Setup inference client
|
|
171
|
+
if provider == "openai":
|
|
172
|
+
client = _get_openai_client()
|
|
173
|
+
inference_url = "openai://chat-completions"
|
|
174
|
+
elif provider == "synth":
|
|
175
|
+
client = _get_synth_client()
|
|
176
|
+
inference_url = "synth://inference"
|
|
177
|
+
else:
|
|
178
|
+
raise ValueError(f"Unknown provider: {provider}")
|
|
179
|
+
|
|
180
|
+
print(f"🎮 Collecting {num_episodes} episodes with {model}")
|
|
181
|
+
print(f" Provider: {provider}")
|
|
182
|
+
print(f" Max steps: {max_steps}")
|
|
183
|
+
print(f" Output: {output_dir}")
|
|
184
|
+
print(f" Database: {db_path}")
|
|
185
|
+
print()
|
|
186
|
+
|
|
187
|
+
total_steps = 0
|
|
188
|
+
total_achievements = 0
|
|
189
|
+
|
|
190
|
+
for episode_id in range(num_episodes):
|
|
191
|
+
seed = seed_start + episode_id
|
|
192
|
+
|
|
193
|
+
# Build task instance
|
|
194
|
+
task_instance = _build_task_instance(seed)
|
|
195
|
+
env = CrafterClassicEnvironment(task_instance)
|
|
196
|
+
wrapper = CrafterEnvironmentWrapper(env, seed=seed)
|
|
197
|
+
|
|
198
|
+
# Initialize policy (vision auto-detected from model name)
|
|
199
|
+
policy = CrafterPolicy(inference_url=inference_url, model=model)
|
|
200
|
+
await policy.initialize({
|
|
201
|
+
"use_tools": True,
|
|
202
|
+
"model": model,
|
|
203
|
+
"temperature": temperature,
|
|
204
|
+
"max_tokens": 512,
|
|
205
|
+
})
|
|
206
|
+
|
|
207
|
+
observation_packet = await wrapper.initialize()
|
|
208
|
+
|
|
209
|
+
steps_taken = 0
|
|
210
|
+
achievements = set()
|
|
211
|
+
|
|
212
|
+
# Run episode
|
|
213
|
+
for step_idx in range(max_steps):
|
|
214
|
+
obs_dict = observation_packet.get("observation")
|
|
215
|
+
if not isinstance(obs_dict, dict):
|
|
216
|
+
break
|
|
217
|
+
|
|
218
|
+
# Format observation
|
|
219
|
+
obs_text = policy._format_observation_for_llm(observation_packet) # noqa: SLF001
|
|
220
|
+
|
|
221
|
+
# Get tool calls from policy
|
|
222
|
+
tool_calls, meta = await policy.step(
|
|
223
|
+
observation_text=obs_text,
|
|
224
|
+
metadata={"raw_observation": observation_packet},
|
|
225
|
+
)
|
|
226
|
+
if "inference_request" not in meta:
|
|
227
|
+
break
|
|
228
|
+
|
|
229
|
+
inference_request = meta["inference_request"]
|
|
230
|
+
|
|
231
|
+
# Call inference
|
|
232
|
+
if provider == "openai":
|
|
233
|
+
normalized_request = _normalise_openai_request(
|
|
234
|
+
inference_request,
|
|
235
|
+
model=model,
|
|
236
|
+
temperature=temperature,
|
|
237
|
+
)
|
|
238
|
+
response = client.chat.completions.create(**normalized_request)
|
|
239
|
+
response_dict = response.model_dump()
|
|
240
|
+
else: # synth
|
|
241
|
+
response_dict = await client.create_chat_completion(
|
|
242
|
+
model=model,
|
|
243
|
+
messages=inference_request["messages"],
|
|
244
|
+
temperature=temperature,
|
|
245
|
+
max_tokens=512,
|
|
246
|
+
tools=inference_request.get("tools"),
|
|
247
|
+
)
|
|
248
|
+
|
|
249
|
+
# Parse tool calls
|
|
250
|
+
assistant_tool_calls = CrafterPolicy.parse_response_to_tool_calls(
|
|
251
|
+
response_dict,
|
|
252
|
+
use_tools=policy.use_tools,
|
|
253
|
+
)
|
|
254
|
+
if not assistant_tool_calls:
|
|
255
|
+
break
|
|
256
|
+
|
|
257
|
+
# Store trace
|
|
258
|
+
assistant_message = response_dict["choices"][0].get("message", {})
|
|
259
|
+
trace_messages = inference_request["messages"] + [assistant_message]
|
|
260
|
+
|
|
261
|
+
tracing_store_any = cast(Any, tracing_store)
|
|
262
|
+
if hasattr(tracing_store_any, "store_trace"):
|
|
263
|
+
await tracing_store_any.store_trace(
|
|
264
|
+
session_id=f"ep{episode_id:04d}",
|
|
265
|
+
step=step_idx,
|
|
266
|
+
messages=trace_messages,
|
|
267
|
+
model=model,
|
|
268
|
+
metadata={
|
|
269
|
+
"seed": seed,
|
|
270
|
+
"has_image": policy.use_vision,
|
|
271
|
+
"provider": provider,
|
|
272
|
+
},
|
|
273
|
+
)
|
|
274
|
+
else:
|
|
275
|
+
logging.warning(
|
|
276
|
+
"Tracing backend does not expose store_trace(); skipping persistence for episode %s",
|
|
277
|
+
episode_id,
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Execute action
|
|
281
|
+
assistant_text = assistant_message.get("content")
|
|
282
|
+
env_response = await wrapper.step(assistant_tool_calls)
|
|
283
|
+
if not isinstance(env_response, dict):
|
|
284
|
+
break
|
|
285
|
+
|
|
286
|
+
# Update policy history
|
|
287
|
+
policy._append_assistant_turn( # noqa: SLF001
|
|
288
|
+
assistant_text,
|
|
289
|
+
assistant_tool_calls,
|
|
290
|
+
env_response,
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
steps_taken += 1
|
|
294
|
+
|
|
295
|
+
# Track achievements
|
|
296
|
+
obs = env_response.get("observation", {})
|
|
297
|
+
ach_status = obs.get("achievements_status", {})
|
|
298
|
+
for name, unlocked in ach_status.items():
|
|
299
|
+
if unlocked:
|
|
300
|
+
achievements.add(name)
|
|
301
|
+
|
|
302
|
+
if env_response.get("done"):
|
|
303
|
+
break
|
|
304
|
+
observation_packet = env_response
|
|
305
|
+
|
|
306
|
+
await wrapper.terminate()
|
|
307
|
+
|
|
308
|
+
total_steps += steps_taken
|
|
309
|
+
total_achievements += len(achievements)
|
|
310
|
+
|
|
311
|
+
print(
|
|
312
|
+
f"✓ Episode {episode_id:3d} (seed={seed}): {steps_taken} steps, "
|
|
313
|
+
f"{len(achievements)} achievements"
|
|
314
|
+
)
|
|
315
|
+
|
|
316
|
+
print()
|
|
317
|
+
print(f"✅ Collection complete!")
|
|
318
|
+
print(f" Total episodes: {num_episodes}")
|
|
319
|
+
print(f" Total steps: {total_steps}")
|
|
320
|
+
print(f" Avg achievements: {total_achievements / num_episodes:.2f}")
|
|
321
|
+
print(f" Database: {db_path}")
|
|
322
|
+
print()
|
|
323
|
+
print("Next steps:")
|
|
324
|
+
print(" 1. Export traces to SFT JSONL format")
|
|
325
|
+
print(" 2. Split into train/val datasets")
|
|
326
|
+
print(" 3. Train VLM with LoRA")
|
|
327
|
+
|
|
328
|
+
return db_path
|
|
329
|
+
|
|
330
|
+
|
|
331
|
+
async def main() -> None:
|
|
332
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
333
|
+
parser.add_argument(
|
|
334
|
+
"--model",
|
|
335
|
+
required=True,
|
|
336
|
+
help="Model name (e.g., gpt-5-nano, Qwen/Qwen3-VL-8B-Instruct)",
|
|
337
|
+
)
|
|
338
|
+
parser.add_argument(
|
|
339
|
+
"--provider",
|
|
340
|
+
choices=["openai", "synth"],
|
|
341
|
+
required=True,
|
|
342
|
+
help="Inference provider",
|
|
343
|
+
)
|
|
344
|
+
parser.add_argument("--episodes", type=int, default=100, help="Number of episodes")
|
|
345
|
+
parser.add_argument("--max-steps", type=int, default=50, help="Max steps per episode")
|
|
346
|
+
parser.add_argument("--seed-start", type=int, default=0, help="Starting seed")
|
|
347
|
+
parser.add_argument("--temperature", type=float, default=0.7, help="Sampling temperature")
|
|
348
|
+
parser.add_argument(
|
|
349
|
+
"--output-dir",
|
|
350
|
+
type=Path,
|
|
351
|
+
default=Path("traces/vision_traces"),
|
|
352
|
+
help="Output directory for traces",
|
|
353
|
+
)
|
|
354
|
+
args = parser.parse_args()
|
|
355
|
+
|
|
356
|
+
await collect_traces(
|
|
357
|
+
model=args.model,
|
|
358
|
+
provider=args.provider,
|
|
359
|
+
num_episodes=args.episodes,
|
|
360
|
+
max_steps=args.max_steps,
|
|
361
|
+
seed_start=args.seed_start,
|
|
362
|
+
output_dir=args.output_dir,
|
|
363
|
+
temperature=args.temperature,
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
|
|
367
|
+
if __name__ == "__main__":
|
|
368
|
+
asyncio.run(main())
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
[algorithm]
|
|
2
|
+
type = "online"
|
|
3
|
+
method = "policy_gradient"
|
|
4
|
+
variety = "gspo"
|
|
5
|
+
|
|
6
|
+
[services]
|
|
7
|
+
task_url = "https://YOUR-MODAL-TASK-APP.modal.run"
|
|
8
|
+
|
|
9
|
+
[compute]
|
|
10
|
+
gpu_type = "H200"
|
|
11
|
+
gpu_count = 2
|
|
12
|
+
|
|
13
|
+
[topology]
|
|
14
|
+
type = "single_node_split"
|
|
15
|
+
gpus_for_vllm = 1
|
|
16
|
+
gpus_for_training = 1
|
|
17
|
+
gpus_for_ref = 0
|
|
18
|
+
tensor_parallel = 1
|
|
19
|
+
|
|
20
|
+
[vllm]
|
|
21
|
+
tensor_parallel_size = 1
|
|
22
|
+
max_model_len = 4096
|
|
23
|
+
|
|
24
|
+
[reference]
|
|
25
|
+
placement = "none"
|
|
26
|
+
|
|
27
|
+
[model]
|
|
28
|
+
base = "Qwen/Qwen3-VL-4B-Instruct"
|
|
29
|
+
trainer_mode = "lora"
|
|
30
|
+
label = "crafter-rl-vision-qwen3vl4b"
|
|
31
|
+
supports_vision = true
|
|
32
|
+
|
|
33
|
+
[lora]
|
|
34
|
+
r = 16
|
|
35
|
+
alpha = 32
|
|
36
|
+
dropout = 0.05
|
|
37
|
+
target_modules = [ "all-linear",]
|
|
38
|
+
|
|
39
|
+
[rollout]
|
|
40
|
+
env_name = "crafter"
|
|
41
|
+
max_turns = 10
|
|
42
|
+
episodes_per_batch = 2
|
|
43
|
+
policy_name = "crafter-react"
|
|
44
|
+
max_concurrent_rollouts = 4
|
|
45
|
+
batches_per_step = 2
|
|
46
|
+
ops = [ "agent", "env",]
|
|
47
|
+
|
|
48
|
+
[evaluation]
|
|
49
|
+
instances = 8
|
|
50
|
+
every_n_iters = 5
|
|
51
|
+
seeds = [ 0, 1, 2, 3, 4, 5, 6, 7,]
|
|
52
|
+
|
|
53
|
+
[training]
|
|
54
|
+
num_epochs = 1
|
|
55
|
+
iterations_per_epoch = 3
|
|
56
|
+
gradient_accumulation_steps = 2
|
|
57
|
+
max_accumulated_minibatch = 1
|
|
58
|
+
max_turns = 10
|
|
59
|
+
batch_size = 2
|
|
60
|
+
group_size = 2
|
|
61
|
+
learning_rate = 5e-5
|
|
62
|
+
log_interval = 1
|
|
63
|
+
weight_sync_interval = 1
|
|
64
|
+
event_rewards_kind = "unique"
|
|
65
|
+
async_semaphore_max = 2
|
|
66
|
+
step_rewards_enabled = true
|
|
67
|
+
step_rewards_mode = "decision_stepwise"
|
|
68
|
+
step_rewards_indicator_lambda = 1.0
|
|
69
|
+
step_rewards_beta = 0.0
|
|
70
|
+
step_rewards_strategy = "consistent"
|
|
71
|
+
max_images_per_message = 1
|
|
72
|
+
supports_vision = true
|
|
73
|
+
|
|
74
|
+
[tags]
|
|
75
|
+
experiment = "crafter_rl_vision_qwen3vl4b"
|
|
76
|
+
task = "crafter_agent_vision"
|
|
77
|
+
model_size = "4b"
|
|
78
|
+
vision_enabled = true
|
|
79
|
+
image_only = true
|
|
80
|
+
|
|
81
|
+
[vllm.limit_mm_per_prompt]
|
|
82
|
+
image = 1
|
|
83
|
+
|
|
84
|
+
[rollout.env_config]
|
|
85
|
+
difficulty = "easy"
|
|
86
|
+
|
|
87
|
+
[rollout.policy_config]
|
|
88
|
+
use_vision = true
|
|
89
|
+
image_only_mode = true
|
|
90
|
+
temperature = 0.6
|
|
91
|
+
top_p = 0.95
|
|
92
|
+
max_tokens = 512
|
|
93
|
+
max_llm_calls = 10
|
|
94
|
+
|
|
95
|
+
[training.weight_sync]
|
|
96
|
+
enable = true
|
|
97
|
+
targets = [ "policy",]
|
|
98
|
+
mode = "direct"
|
|
99
|
+
direct = true
|
|
100
|
+
verify_every_k = 0
|
|
101
|
+
|
|
102
|
+
[judge.options]
|
|
103
|
+
timeout_s = 30
|
|
104
|
+
|
|
105
|
+
[rollout.env_config.step_rewards]
|
|
106
|
+
enabled = true
|
|
107
|
+
mode = "decision_stepwise"
|
|
108
|
+
strategy = "consistent"
|
|
109
|
+
indicator_lambda = 1.0
|
|
110
|
+
step_beta = 0.0
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# Example Vision SFT Config for Crafter
|
|
2
|
+
# Train Qwen-VL on collected vision traces
|
|
3
|
+
|
|
4
|
+
[algorithm]
|
|
5
|
+
type = "offline"
|
|
6
|
+
method = "sft"
|
|
7
|
+
variety = "lora"
|
|
8
|
+
|
|
9
|
+
[job]
|
|
10
|
+
model = "Qwen/Qwen3-VL-8B-Instruct" # or Qwen/Qwen3-VL-4B-Instruct
|
|
11
|
+
# Dataset from collect_vision_traces.py → export_to_sft.py
|
|
12
|
+
data = "traces/gpt5nano_vision/train.jsonl"
|
|
13
|
+
|
|
14
|
+
[compute]
|
|
15
|
+
gpu_type = "H200"
|
|
16
|
+
gpu_count = 2 # 2x H200 (282GB total)
|
|
17
|
+
nodes = 1
|
|
18
|
+
|
|
19
|
+
[training]
|
|
20
|
+
mode = "lora" # SFT with LoRA
|
|
21
|
+
use_qlora = true # Quantized LoRA for memory efficiency
|
|
22
|
+
|
|
23
|
+
[hyperparameters]
|
|
24
|
+
n_epochs = 2 # 2 epochs over collected samples
|
|
25
|
+
per_device_batch = 1 # Batch size 1 (images are memory-intensive)
|
|
26
|
+
gradient_accumulation_steps = 32
|
|
27
|
+
sequence_length = 2048 # Shorter context (images dominate memory)
|
|
28
|
+
learning_rate = 5e-06
|
|
29
|
+
warmup_ratio = 0.03
|
|
30
|
+
train_kind = "peft"
|
|
31
|
+
|
|
32
|
+
# LoRA config
|
|
33
|
+
lora_rank = 16
|
|
34
|
+
lora_alpha = 32
|
|
35
|
+
lora_dropout = 0.05
|
|
36
|
+
lora_target_modules = ["all-linear"] # Full linear layer adaptation
|
|
37
|
+
|
|
38
|
+
# Training optimizations
|
|
39
|
+
[hyperparameters.parallelism]
|
|
40
|
+
use_deepspeed = true
|
|
41
|
+
deepspeed_stage = 2
|
|
42
|
+
fsdp = false
|
|
43
|
+
bf16 = true
|
|
44
|
+
fp16 = false
|
|
45
|
+
activation_checkpointing = true
|
|
46
|
+
|
|
47
|
+
# Evaluation
|
|
48
|
+
evaluation_strategy = "steps"
|
|
49
|
+
eval_steps = 100
|
|
50
|
+
save_best_model_at_end = true
|
|
51
|
+
metric_for_best_model = "val.loss"
|
|
52
|
+
greater_is_better = false
|
|
53
|
+
load_best_model_at_end = true
|
|
54
|
+
|
|
55
|
+
[tags]
|
|
56
|
+
task = "crafter"
|
|
57
|
+
modality = "vision"
|
|
58
|
+
data_source = "collected_traces"
|
|
59
|
+
model_family = "qwen_vl"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Evaluation config for gpt-4o-mini with vision
|
|
2
|
+
# Higher-quality teacher for Crafter SFT distillation
|
|
3
|
+
|
|
4
|
+
[eval]
|
|
5
|
+
app_id = "grpo-crafter-task-app"
|
|
6
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
|
|
7
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
8
|
+
seeds = "200-299"
|
|
9
|
+
max_turns = 50
|
|
10
|
+
concurrency = 5
|
|
11
|
+
env_name = "crafter"
|
|
12
|
+
policy_name = "crafter-react"
|
|
13
|
+
trace_format = "structured"
|
|
14
|
+
return_trace = true
|
|
15
|
+
|
|
16
|
+
[eval.env_config]
|
|
17
|
+
env_params = {max_steps_per_episode = 50}
|
|
18
|
+
|
|
19
|
+
[eval.policy_config]
|
|
20
|
+
provider = "openai"
|
|
21
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
22
|
+
temperature = 0.6
|
|
23
|
+
max_tokens = 512
|
|
24
|
+
use_vision = true
|
|
25
|
+
image_only_mode = false
|
|
26
|
+
use_tools = true
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Proper synth-ai eval config for Crafter with gpt-4o-mini vision
|
|
2
|
+
# Collects traces with images to database for synth-ai filter
|
|
3
|
+
|
|
4
|
+
[eval]
|
|
5
|
+
app_id = "grpo-crafter-task-app" # Modal deployed task app
|
|
6
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
7
|
+
seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] # 10 episodes for test
|
|
8
|
+
max_turns = 50 # 50 steps per episode
|
|
9
|
+
concurrency = 2 # 2 parallel episodes
|
|
10
|
+
env_name = "crafter"
|
|
11
|
+
policy_name = "crafter-react"
|
|
12
|
+
trace_format = "structured" # Required for synth-ai eval
|
|
13
|
+
return_trace = true
|
|
14
|
+
|
|
15
|
+
[eval.env_config]
|
|
16
|
+
env_params = {max_steps_per_episode = 50}
|
|
17
|
+
|
|
18
|
+
[eval.policy_config]
|
|
19
|
+
provider = "openai"
|
|
20
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
21
|
+
inference_url = "https://api.openai.com" # Base URL
|
|
22
|
+
# Note: Don't set temperature for gpt-4o-mini, use default
|
|
23
|
+
top_p = 0.95
|
|
24
|
+
max_tokens = 512
|
|
25
|
+
use_vision = true # Enable vision
|
|
26
|
+
image_only_mode = false # Use both text + images
|
|
27
|
+
max_llm_calls = 50
|
|
28
|
+
use_tools = true # Enable tool calling
|
|
29
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Evaluation config for gpt-4o-mini (vision)
|
|
2
|
+
# Collects traces for SFT training; legacy gpt-5-nano naming kept for convenience
|
|
3
|
+
|
|
4
|
+
[eval]
|
|
5
|
+
app_id = "grpo-crafter-task-app"
|
|
6
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
|
|
7
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
8
|
+
seeds = "0-99"
|
|
9
|
+
max_turns = 50
|
|
10
|
+
concurrency = 5
|
|
11
|
+
env_name = "crafter"
|
|
12
|
+
policy_name = "crafter-react"
|
|
13
|
+
trace_format = "structured"
|
|
14
|
+
return_trace = true
|
|
15
|
+
|
|
16
|
+
[eval.env_config]
|
|
17
|
+
env_params = {max_steps_per_episode = 50}
|
|
18
|
+
|
|
19
|
+
[eval.policy_config]
|
|
20
|
+
provider = "openai"
|
|
21
|
+
model = "gpt-4o-mini-2024-07-18"
|
|
22
|
+
temperature = 0.7
|
|
23
|
+
max_tokens = 512
|
|
24
|
+
use_vision = true
|
|
25
|
+
image_only_mode = false
|
|
26
|
+
use_tools = true
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# Evaluation config for Qwen3-VL vision rollouts
|
|
2
|
+
# Collects traces for SFT training via synth-ai hosted inference
|
|
3
|
+
|
|
4
|
+
[eval]
|
|
5
|
+
app_id = "grpo-crafter-task-app"
|
|
6
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app.modal.run"
|
|
7
|
+
model = "Qwen/Qwen3-VL-8B-Instruct"
|
|
8
|
+
seeds = "100-199"
|
|
9
|
+
max_turns = 50
|
|
10
|
+
concurrency = 5
|
|
11
|
+
env_name = "crafter"
|
|
12
|
+
policy_name = "crafter-react"
|
|
13
|
+
trace_format = "structured"
|
|
14
|
+
return_trace = true
|
|
15
|
+
|
|
16
|
+
[eval.env_config]
|
|
17
|
+
env_params = {max_steps_per_episode = 50}
|
|
18
|
+
|
|
19
|
+
[eval.policy_config]
|
|
20
|
+
provider = "synth"
|
|
21
|
+
model = "Qwen/Qwen3-VL-8B-Instruct"
|
|
22
|
+
temperature = 0.7
|
|
23
|
+
max_tokens = 512
|
|
24
|
+
use_vision = true
|
|
25
|
+
image_only_mode = false
|
|
26
|
+
use_tools = true
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# Filter Qwen3-VL vision traces for SFT training
|
|
2
|
+
# Mirrors the GPT-4o mini filter configuration for vision data
|
|
3
|
+
|
|
4
|
+
[filter]
|
|
5
|
+
input_db = "traces/qwen3vl_vision/rollouts.db"
|
|
6
|
+
output_dir = "traces/qwen3vl_vision/sft"
|
|
7
|
+
|
|
8
|
+
# Quality filters
|
|
9
|
+
min_steps_per_episode = 5
|
|
10
|
+
min_achievements_per_episode = 0
|
|
11
|
+
max_steps_per_episode = 50
|
|
12
|
+
|
|
13
|
+
# Behavioral filters
|
|
14
|
+
detect_loops = true
|
|
15
|
+
max_repeated_actions = 5
|
|
16
|
+
min_unique_states = 3
|
|
17
|
+
|
|
18
|
+
# Remove episodes with errors
|
|
19
|
+
filter_errors = true
|
|
20
|
+
filter_timeouts = true
|
|
21
|
+
|
|
22
|
+
# Export format
|
|
23
|
+
export_format = "sft_jsonl"
|
|
24
|
+
include_images = true
|
|
25
|
+
include_metadata = true
|
|
26
|
+
|
|
27
|
+
# SFT-specific processing
|
|
28
|
+
[sft]
|
|
29
|
+
max_sequence_length = 2048
|
|
30
|
+
deduplicate = true
|
|
31
|
+
shuffle = true
|
|
32
|
+
require_valid_tool_calls = true
|
|
33
|
+
filter_empty_responses = true
|
|
34
|
+
|
|
35
|
+
# Train/val split
|
|
36
|
+
[split]
|
|
37
|
+
enabled = true
|
|
38
|
+
val_fraction = 0.1
|
|
39
|
+
random_seed = 42
|
|
40
|
+
stratify_by = "achievements"
|
|
41
|
+
|
|
42
|
+
train_file = "train.jsonl"
|
|
43
|
+
val_file = "val.jsonl"
|
|
44
|
+
|
|
45
|
+
# Statistics
|
|
46
|
+
[output]
|
|
47
|
+
save_stats = true
|
|
48
|
+
stats_file = "filter_stats.json"
|
|
49
|
+
save_filtered_episode_ids = true
|