synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Quick test script to demonstrate image validation.
|
|
4
|
+
|
|
5
|
+
Run from synth-ai root:
|
|
6
|
+
uv run python examples/qwen_vl/test_image_validation.py
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from synth_ai.learning.sft.data import coerce_example, validate_vision_example
|
|
10
|
+
|
|
11
|
+
# Test cases
|
|
12
|
+
test_cases = [
|
|
13
|
+
{
|
|
14
|
+
"name": "Valid - HTTP URL",
|
|
15
|
+
"data": {
|
|
16
|
+
"messages": [
|
|
17
|
+
{
|
|
18
|
+
"role": "user",
|
|
19
|
+
"content": [
|
|
20
|
+
{"type": "text", "text": "Describe this"},
|
|
21
|
+
{"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
|
|
22
|
+
],
|
|
23
|
+
},
|
|
24
|
+
{"role": "assistant", "content": "A beautiful image"},
|
|
25
|
+
]
|
|
26
|
+
},
|
|
27
|
+
"should_pass": True,
|
|
28
|
+
},
|
|
29
|
+
{
|
|
30
|
+
"name": "Valid - Base64",
|
|
31
|
+
"data": {
|
|
32
|
+
"messages": [
|
|
33
|
+
{
|
|
34
|
+
"role": "user",
|
|
35
|
+
"content": [
|
|
36
|
+
{"type": "image_url", "image_url": {"url": "..."}},
|
|
37
|
+
],
|
|
38
|
+
},
|
|
39
|
+
{"role": "assistant", "content": "An image"},
|
|
40
|
+
]
|
|
41
|
+
},
|
|
42
|
+
"should_pass": True,
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
"name": "Invalid - Empty URL",
|
|
46
|
+
"data": {
|
|
47
|
+
"messages": [
|
|
48
|
+
{
|
|
49
|
+
"role": "user",
|
|
50
|
+
"content": [
|
|
51
|
+
{"type": "text", "text": "What's this?"},
|
|
52
|
+
{"type": "image_url", "image_url": {"url": ""}}, # Empty!
|
|
53
|
+
],
|
|
54
|
+
},
|
|
55
|
+
{"role": "assistant", "content": "Response"},
|
|
56
|
+
]
|
|
57
|
+
},
|
|
58
|
+
"should_pass": False,
|
|
59
|
+
},
|
|
60
|
+
{
|
|
61
|
+
"name": "Invalid - Missing URL field",
|
|
62
|
+
"data": {
|
|
63
|
+
"messages": [
|
|
64
|
+
{
|
|
65
|
+
"role": "user",
|
|
66
|
+
"content": [
|
|
67
|
+
{"type": "image_url", "image_url": {}}, # No url field!
|
|
68
|
+
],
|
|
69
|
+
},
|
|
70
|
+
{"role": "assistant", "content": "Response"},
|
|
71
|
+
]
|
|
72
|
+
},
|
|
73
|
+
"should_pass": False,
|
|
74
|
+
},
|
|
75
|
+
{
|
|
76
|
+
"name": "Invalid - Null URL",
|
|
77
|
+
"data": {
|
|
78
|
+
"messages": [
|
|
79
|
+
{
|
|
80
|
+
"role": "user",
|
|
81
|
+
"content": [
|
|
82
|
+
{"type": "image_url", "image_url": {"url": None}}, # Null!
|
|
83
|
+
],
|
|
84
|
+
},
|
|
85
|
+
{"role": "assistant", "content": "Response"},
|
|
86
|
+
]
|
|
87
|
+
},
|
|
88
|
+
"should_pass": False,
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"name": "Invalid - Whitespace URL",
|
|
92
|
+
"data": {
|
|
93
|
+
"messages": [
|
|
94
|
+
{
|
|
95
|
+
"role": "user",
|
|
96
|
+
"content": [
|
|
97
|
+
{"type": "image_url", "image_url": {"url": " "}}, # Whitespace!
|
|
98
|
+
],
|
|
99
|
+
},
|
|
100
|
+
{"role": "assistant", "content": "Response"},
|
|
101
|
+
]
|
|
102
|
+
},
|
|
103
|
+
"should_pass": False,
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
"name": "Invalid - Mixed valid and invalid",
|
|
107
|
+
"data": {
|
|
108
|
+
"messages": [
|
|
109
|
+
{
|
|
110
|
+
"role": "user",
|
|
111
|
+
"content": [
|
|
112
|
+
{"type": "image_url", "image_url": {"url": "https://example.com/valid.jpg"}},
|
|
113
|
+
{"type": "image_url", "image_url": {"url": ""}}, # One invalid!
|
|
114
|
+
],
|
|
115
|
+
},
|
|
116
|
+
{"role": "assistant", "content": "Response"},
|
|
117
|
+
]
|
|
118
|
+
},
|
|
119
|
+
"should_pass": False,
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"name": "Invalid - Non-string URL",
|
|
123
|
+
"data": {
|
|
124
|
+
"messages": [
|
|
125
|
+
{
|
|
126
|
+
"role": "user",
|
|
127
|
+
"content": [
|
|
128
|
+
{"type": "image_url", "image_url": {"url": 12345}}, # Integer!
|
|
129
|
+
],
|
|
130
|
+
},
|
|
131
|
+
{"role": "assistant", "content": "Response"},
|
|
132
|
+
]
|
|
133
|
+
},
|
|
134
|
+
"should_pass": False,
|
|
135
|
+
},
|
|
136
|
+
]
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def main():
|
|
140
|
+
print("=" * 80)
|
|
141
|
+
print("IMAGE VALIDATION TEST")
|
|
142
|
+
print("=" * 80)
|
|
143
|
+
print()
|
|
144
|
+
|
|
145
|
+
passed = 0
|
|
146
|
+
failed = 0
|
|
147
|
+
|
|
148
|
+
for test in test_cases:
|
|
149
|
+
name = test["name"]
|
|
150
|
+
data = test["data"]
|
|
151
|
+
should_pass = test["should_pass"]
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
example = coerce_example(data)
|
|
155
|
+
is_valid, error = validate_vision_example(example, require_images=True)
|
|
156
|
+
|
|
157
|
+
if should_pass:
|
|
158
|
+
if is_valid:
|
|
159
|
+
print(f"✅ PASS: {name}")
|
|
160
|
+
print(f" → Correctly accepted valid example")
|
|
161
|
+
passed += 1
|
|
162
|
+
else:
|
|
163
|
+
print(f"❌ FAIL: {name}")
|
|
164
|
+
print(f" → Should pass but got error: {error}")
|
|
165
|
+
failed += 1
|
|
166
|
+
else:
|
|
167
|
+
if not is_valid:
|
|
168
|
+
print(f"✅ PASS: {name}")
|
|
169
|
+
print(f" → Correctly rejected: {error}")
|
|
170
|
+
passed += 1
|
|
171
|
+
else:
|
|
172
|
+
print(f"❌ FAIL: {name}")
|
|
173
|
+
print(f" → Should fail but passed validation")
|
|
174
|
+
failed += 1
|
|
175
|
+
except Exception as exc:
|
|
176
|
+
if should_pass:
|
|
177
|
+
print(f"❌ FAIL: {name}")
|
|
178
|
+
print(f" → Unexpected exception: {exc}")
|
|
179
|
+
failed += 1
|
|
180
|
+
else:
|
|
181
|
+
print(f"✅ PASS: {name}")
|
|
182
|
+
print(f" → Correctly raised exception: {exc}")
|
|
183
|
+
passed += 1
|
|
184
|
+
|
|
185
|
+
print()
|
|
186
|
+
|
|
187
|
+
print("=" * 80)
|
|
188
|
+
print(f"RESULTS: {passed}/{len(test_cases)} passed, {failed}/{len(test_cases)} failed")
|
|
189
|
+
print("=" * 80)
|
|
190
|
+
|
|
191
|
+
if failed == 0:
|
|
192
|
+
print("🎉 All tests passed!")
|
|
193
|
+
return 0
|
|
194
|
+
else:
|
|
195
|
+
print(f"⚠️ {failed} test(s) failed")
|
|
196
|
+
return 1
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
if __name__ == "__main__":
|
|
200
|
+
exit(main())
|
|
201
|
+
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
"""Generate test vision SFT dataset for Qwen3-VL-2B."""
|
|
2
|
+
|
|
3
|
+
import base64
|
|
4
|
+
import json
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from io import BytesIO
|
|
7
|
+
|
|
8
|
+
try:
|
|
9
|
+
from PIL import Image
|
|
10
|
+
except ImportError:
|
|
11
|
+
print("❌ PIL not available")
|
|
12
|
+
exit(1)
|
|
13
|
+
|
|
14
|
+
BASE_DIR = Path(__file__).resolve().parent
|
|
15
|
+
|
|
16
|
+
def create_test_image(color: str) -> str:
|
|
17
|
+
"""Create a 64x64 colored square and return base64 data URL."""
|
|
18
|
+
colors = {
|
|
19
|
+
"red": (255, 0, 0),
|
|
20
|
+
"blue": (0, 0, 255),
|
|
21
|
+
"green": (0, 255, 0),
|
|
22
|
+
"yellow": (255, 255, 0),
|
|
23
|
+
"purple": (128, 0, 128),
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
img = Image.new('RGB', (64, 64), color=colors[color])
|
|
27
|
+
buffer = BytesIO()
|
|
28
|
+
img.save(buffer, format='PNG')
|
|
29
|
+
b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
|
|
30
|
+
return f"data:image/png;base64,{b64}"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def main():
|
|
34
|
+
output_dir = BASE_DIR / "test_data"
|
|
35
|
+
output_dir.mkdir(parents=True, exist_ok=True)
|
|
36
|
+
|
|
37
|
+
output_file = output_dir / "vision_sft_test.jsonl"
|
|
38
|
+
|
|
39
|
+
# Create 10 training examples with different colored images
|
|
40
|
+
examples = []
|
|
41
|
+
colors = ["red", "blue", "green", "yellow", "purple"]
|
|
42
|
+
|
|
43
|
+
for i, color in enumerate(colors):
|
|
44
|
+
# Simple color identification
|
|
45
|
+
examples.append({
|
|
46
|
+
"messages": [
|
|
47
|
+
{
|
|
48
|
+
"role": "user",
|
|
49
|
+
"content": [
|
|
50
|
+
{"type": "text", "text": "What color is this image? Answer in one word."},
|
|
51
|
+
{"type": "image_url", "image_url": {"url": create_test_image(color)}},
|
|
52
|
+
],
|
|
53
|
+
},
|
|
54
|
+
{
|
|
55
|
+
"role": "assistant",
|
|
56
|
+
"content": color.capitalize(),
|
|
57
|
+
},
|
|
58
|
+
],
|
|
59
|
+
"metadata": {"example_id": f"color_{i}", "type": "color_id"},
|
|
60
|
+
})
|
|
61
|
+
|
|
62
|
+
# Describe the image
|
|
63
|
+
examples.append({
|
|
64
|
+
"messages": [
|
|
65
|
+
{
|
|
66
|
+
"role": "user",
|
|
67
|
+
"content": [
|
|
68
|
+
{"type": "text", "text": "Describe this image briefly."},
|
|
69
|
+
{"type": "image_url", "image_url": {"url": create_test_image(color)}},
|
|
70
|
+
],
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
"role": "assistant",
|
|
74
|
+
"content": f"This is a {color} colored square image.",
|
|
75
|
+
},
|
|
76
|
+
],
|
|
77
|
+
"metadata": {"example_id": f"describe_{i}", "type": "description"},
|
|
78
|
+
})
|
|
79
|
+
|
|
80
|
+
# Write JSONL
|
|
81
|
+
with output_file.open("w", encoding="utf-8") as f:
|
|
82
|
+
for example in examples:
|
|
83
|
+
f.write(json.dumps(example) + "\n")
|
|
84
|
+
|
|
85
|
+
print(f"✅ Created {len(examples)} vision SFT examples")
|
|
86
|
+
print(f" Output: {output_file}")
|
|
87
|
+
print(f" Size: {output_file.stat().st_size / 1024:.1f} KB")
|
|
88
|
+
|
|
89
|
+
# Validate with SDK
|
|
90
|
+
try:
|
|
91
|
+
from synth_ai.learning.sft.data import load_jsonl, validate_vision_example
|
|
92
|
+
|
|
93
|
+
loaded = load_jsonl(output_file, min_messages=1)
|
|
94
|
+
print(f" Loaded: {len(loaded)} examples")
|
|
95
|
+
|
|
96
|
+
valid_count = 0
|
|
97
|
+
for ex in loaded:
|
|
98
|
+
is_valid, error = validate_vision_example(ex, require_images=True)
|
|
99
|
+
if is_valid:
|
|
100
|
+
valid_count += 1
|
|
101
|
+
else:
|
|
102
|
+
print(f" ⚠️ Invalid example: {error}")
|
|
103
|
+
|
|
104
|
+
print(f" Valid: {valid_count}/{len(loaded)}")
|
|
105
|
+
except ImportError:
|
|
106
|
+
print(" (SDK validation skipped - synth_ai not available)")
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
if __name__ == "__main__":
|
|
110
|
+
main()
|
examples/rl/README.md
CHANGED
|
@@ -5,8 +5,8 @@ This example trains a reinforcement learning policy on single-step math problems
|
|
|
5
5
|
## Quick Commands
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
|
-
# Serve locally with tracing
|
|
9
|
-
uvx synth-ai
|
|
8
|
+
# Serve locally with tracing (uvicorn runtime)
|
|
9
|
+
uvx synth-ai deploy --runtime uvicorn math-single-step --port 8101 --env-file examples/rl/.env --trace traces/math
|
|
10
10
|
|
|
11
11
|
# Modal deployment
|
|
12
12
|
uvx synth-ai deploy --name synth-math-single-step --env-file examples/rl/.env
|
|
@@ -45,14 +45,14 @@ The task app is defined in `synth_ai/task/apps/math_single_step.py` and register
|
|
|
45
45
|
- `-0.5` if the tool call omits an answer or uses the wrong tool
|
|
46
46
|
- `-1.0` when no tool call is provided
|
|
47
47
|
|
|
48
|
-
|
|
48
|
+
Run locally (uvicorn runtime) with tracing to capture trajectories:
|
|
49
49
|
|
|
50
50
|
```bash
|
|
51
|
-
uvx synth-ai
|
|
51
|
+
uvx synth-ai deploy --runtime uvicorn math-single-step \
|
|
52
52
|
--port 8101 \
|
|
53
53
|
--env-file examples/rl/.env \
|
|
54
54
|
--trace traces/math \
|
|
55
|
-
--trace-db traces/math/
|
|
55
|
+
--trace-db traces/math/task_app_traces_<timestamp>.db
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
Deploy or serve on Modal using the same env file; the registration includes a `ModalDeploymentConfig` that installs the `datasets` package automatically.
|
|
@@ -162,7 +162,7 @@ For broader background on Synth task apps, CLI commands, and tracing, see the ne
|
|
|
162
162
|
|
|
163
163
|
|
|
164
164
|
uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_base_qwen.toml
|
|
165
|
-
uvx synth-ai
|
|
165
|
+
uvx synth-ai deploy --runtime uvicorn math-single-step \
|
|
166
166
|
--port 8101 \
|
|
167
167
|
--env-file examples/rl/.env \
|
|
168
168
|
--trace traces/math \
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
type = "rl"
|
|
2
|
+
|
|
3
|
+
provider = "synth"
|
|
4
|
+
task_app_url = "http://localhost:8101"
|
|
5
|
+
model = "Qwen/Qwen3-1.7B"
|
|
6
|
+
split = "validation"
|
|
7
|
+
num_episodes = 50
|
|
8
|
+
seed_start = 0
|
|
9
|
+
|
|
10
|
+
[policy]
|
|
11
|
+
inference_url = "https://agent-learning.onrender.com/api/inference"
|
|
12
|
+
max_tokens = 128
|
|
13
|
+
temperature = 0.0
|
|
14
|
+
|
|
15
|
+
# Optionally supply custom headers
|
|
16
|
+
# [policy.headers]
|
|
17
|
+
# Authorization = "Bearer ..."
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
type = "rl"
|
|
2
|
+
|
|
3
|
+
provider = "synth"
|
|
4
|
+
task_app_url = "https://your-math-task.modal.run"
|
|
5
|
+
model = "rl:REPLACE_WITH_JOB_ID"
|
|
6
|
+
split = "test"
|
|
7
|
+
num_episodes = 200
|
|
8
|
+
seed_start = 100000
|
|
9
|
+
|
|
10
|
+
[policy]
|
|
11
|
+
inference_url = "https://your-inference-host"
|
|
12
|
+
max_tokens = 128
|
|
13
|
+
temperature = 0.0
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
[algorithm]
|
|
2
|
+
type = "online"
|
|
3
|
+
method = "policy_gradient"
|
|
4
|
+
variety = "gspo"
|
|
5
|
+
|
|
6
|
+
[services]
|
|
7
|
+
task_url = "https://your-math-task.modal.run"
|
|
8
|
+
|
|
9
|
+
[model]
|
|
10
|
+
base = "Qwen/Qwen3-4B"
|
|
11
|
+
trainer_mode = "full"
|
|
12
|
+
label = "math-single-step-qwen3-4b"
|
|
13
|
+
|
|
14
|
+
[policy]
|
|
15
|
+
model = "Qwen/Qwen3-4B"
|
|
16
|
+
inference_url = "https://your-inference-host"
|
|
17
|
+
max_tokens = 128
|
|
18
|
+
temperature = 0.0
|
|
19
|
+
|
|
20
|
+
[data]
|
|
21
|
+
split = "train"
|
|
22
|
+
seed_start = 0
|
|
23
|
+
episodes_per_iteration = 2048
|
|
24
|
+
evaluation_split = "validation"
|
|
25
|
+
evaluation_episodes = 256
|
|
26
|
+
|
|
27
|
+
[training]
|
|
28
|
+
num_epochs = 1
|
|
29
|
+
iterations_per_epoch = 20
|
|
30
|
+
max_turns = 1
|
|
31
|
+
ops = ["agent", "env"]
|
|
32
|
+
batch_size = 128
|
|
33
|
+
group_size = 1024
|
|
34
|
+
reward_positive = 1.0
|
|
35
|
+
reward_negative_no_tool = -1.0
|
|
36
|
+
reward_negative_no_answer = -0.5
|
|
37
|
+
learning_rate = 5e-6
|
|
38
|
+
|
|
39
|
+
[compute]
|
|
40
|
+
gpu_type = "A10G"
|
|
41
|
+
gpu_count = 4
|
|
42
|
+
|
|
43
|
+
[topology]
|
|
44
|
+
type = "single_node_split"
|
|
45
|
+
gpus_for_vllm = 2
|
|
46
|
+
gpus_for_training = 2
|
|
47
|
+
gpus_for_ref = 0
|
|
48
|
+
tensor_parallel = 1
|
|
49
|
+
|
|
50
|
+
[rollout]
|
|
51
|
+
env_name = "math"
|
|
52
|
+
policy_name = "math-single-step"
|
|
53
|
+
max_turns = 1
|
|
54
|
+
episodes_per_batch = 256
|
|
55
|
+
|
|
56
|
+
[evaluation]
|
|
57
|
+
instances = 256
|
|
58
|
+
every_n_iters = 10
|
|
59
|
+
seeds = [0, 1, 2, 3, 4]
|
|
60
|
+
|
|
61
|
+
[tags]
|
|
62
|
+
experiment = "math_single_step"
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
[algorithm]
|
|
2
|
+
type = "online"
|
|
3
|
+
method = "policy_gradient"
|
|
4
|
+
variety = "gspo"
|
|
5
|
+
|
|
6
|
+
[services]
|
|
7
|
+
task_url = "http://localhost:8101"
|
|
8
|
+
|
|
9
|
+
[model]
|
|
10
|
+
base = "Qwen/Qwen3-1.7B"
|
|
11
|
+
trainer_mode = "full"
|
|
12
|
+
label = "math-single-step-qwen3-1.7b"
|
|
13
|
+
|
|
14
|
+
[policy]
|
|
15
|
+
model = "Qwen/Qwen3-1.7B"
|
|
16
|
+
inference_url = "https://agent-learning.onrender.com/api/inference"
|
|
17
|
+
max_tokens = 1028
|
|
18
|
+
temperature = 0.2
|
|
19
|
+
|
|
20
|
+
[data]
|
|
21
|
+
split = "train"
|
|
22
|
+
seed_start = 0
|
|
23
|
+
episodes_per_iteration = 1280 # 8 per group * 4 groups per batch * 2 batches per step * 20 steps
|
|
24
|
+
evaluation_split = "validation"
|
|
25
|
+
evaluation_episodes = 50
|
|
26
|
+
|
|
27
|
+
[training]
|
|
28
|
+
num_epochs = 1
|
|
29
|
+
iterations_per_epoch = 20
|
|
30
|
+
max_turns = 1
|
|
31
|
+
ops = ["agent", "env"]
|
|
32
|
+
batch_size = 2
|
|
33
|
+
group_size = 16
|
|
34
|
+
reward_positive = 1.0
|
|
35
|
+
reward_negative_no_tool = -1.0
|
|
36
|
+
reward_negative_no_answer = -0.5
|
|
37
|
+
learning_rate = 5e-6
|
|
38
|
+
log_interval = 1
|
|
39
|
+
weight_sync_interval = 1
|
|
40
|
+
|
|
41
|
+
[training.weight_sync]
|
|
42
|
+
enable = true
|
|
43
|
+
targets = ["policy"]
|
|
44
|
+
|
|
45
|
+
[compute]
|
|
46
|
+
gpu_type = "H100"
|
|
47
|
+
gpu_count = 4
|
|
48
|
+
|
|
49
|
+
[topology]
|
|
50
|
+
type = "single_node_split"
|
|
51
|
+
gpus_for_vllm = 2
|
|
52
|
+
gpus_for_training = 1
|
|
53
|
+
gpus_for_ref = 1
|
|
54
|
+
tensor_parallel = 1
|
|
55
|
+
|
|
56
|
+
[vllm]
|
|
57
|
+
tensor_parallel_size = 1
|
|
58
|
+
max_model_len = 4096
|
|
59
|
+
|
|
60
|
+
[reference]
|
|
61
|
+
placement = "dedicated"
|
|
62
|
+
port = 8002
|
|
63
|
+
tp = 1
|
|
64
|
+
health_max_wait_s = 180
|
|
65
|
+
health_interval_ms = 300
|
|
66
|
+
|
|
67
|
+
[rollout]
|
|
68
|
+
env_name = "math"
|
|
69
|
+
policy_name = "math-single-step"
|
|
70
|
+
max_turns = 1
|
|
71
|
+
episodes_per_batch = 32 # group_size * batch_size
|
|
72
|
+
|
|
73
|
+
[evaluation]
|
|
74
|
+
instances = 32
|
|
75
|
+
every_n_iters = 10
|
|
76
|
+
seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
|
|
77
|
+
|
|
78
|
+
[tags]
|
|
79
|
+
experiment = "math_single_step_qwen17"
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
type = "rl"
|
|
2
|
+
|
|
3
|
+
[services]
|
|
4
|
+
task_url = "https://your-math-task.modal.run"
|
|
5
|
+
|
|
6
|
+
[model]
|
|
7
|
+
source = "ft:REPLACE_WITH_MODEL_ID"
|
|
8
|
+
|
|
9
|
+
[policy]
|
|
10
|
+
model = "ft:REPLACE_WITH_MODEL_ID"
|
|
11
|
+
inference_url = "https://your-inference-host"
|
|
12
|
+
max_tokens = 128
|
|
13
|
+
temperature = 0.0
|
|
14
|
+
|
|
15
|
+
[data]
|
|
16
|
+
split = "train"
|
|
17
|
+
seed_start = 0
|
|
18
|
+
episodes_per_iteration = 2048
|
|
19
|
+
evaluation_split = "validation"
|
|
20
|
+
evaluation_episodes = 256
|
|
21
|
+
|
|
22
|
+
[training]
|
|
23
|
+
max_turns = 1
|
|
24
|
+
ops = ["agent", "env"]
|
|
25
|
+
batch_size = 128
|
|
26
|
+
group_size = 1024
|
|
27
|
+
reward_positive = 1.0
|
|
28
|
+
reward_negative_no_tool = -1.0
|
|
29
|
+
reward_negative_no_answer = -0.5
|
|
30
|
+
learning_rate = 5e-6
|
|
31
|
+
|
|
32
|
+
[compute]
|
|
33
|
+
gpu_type = "A10G"
|
|
34
|
+
gpu_count = 4
|
|
35
|
+
|
|
36
|
+
[tags]
|
|
37
|
+
experiment = "math_single_step_from_fft"
|