synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
# Filter vision traces for SFT training
|
|
2
|
+
# Applies quality filters and exports to SFT JSONL format
|
|
3
|
+
|
|
4
|
+
[filter]
|
|
5
|
+
input_db = "traces/gpt4omini_vision/rollouts.db"
|
|
6
|
+
output_dir = "traces/gpt4omini_vision/sft"
|
|
7
|
+
|
|
8
|
+
# Quality filters
|
|
9
|
+
min_steps_per_episode = 5 # Remove very short episodes
|
|
10
|
+
min_achievements_per_episode = 0 # Allow any achievement count (even 0)
|
|
11
|
+
max_steps_per_episode = 50 # Cap maximum length
|
|
12
|
+
|
|
13
|
+
# Behavioral filters
|
|
14
|
+
detect_loops = true # Detect if agent got stuck
|
|
15
|
+
max_repeated_actions = 5 # Max same action in a row
|
|
16
|
+
min_unique_states = 3 # Require at least 3 unique states
|
|
17
|
+
|
|
18
|
+
# Remove episodes with errors
|
|
19
|
+
filter_errors = true
|
|
20
|
+
filter_timeouts = true
|
|
21
|
+
|
|
22
|
+
# Export format
|
|
23
|
+
export_format = "sft_jsonl" # OpenAI-style messages format
|
|
24
|
+
include_images = true # Keep base64 images in messages
|
|
25
|
+
include_metadata = true # Keep episode/step metadata
|
|
26
|
+
|
|
27
|
+
# SFT-specific processing
|
|
28
|
+
[sft]
|
|
29
|
+
max_sequence_length = 2048 # Truncate messages if longer
|
|
30
|
+
deduplicate = true # Remove duplicate state-action pairs
|
|
31
|
+
shuffle = true # Shuffle samples for training
|
|
32
|
+
|
|
33
|
+
# Keep only high-quality tool calls
|
|
34
|
+
require_valid_tool_calls = true
|
|
35
|
+
filter_empty_responses = true
|
|
36
|
+
|
|
37
|
+
# Train/val split
|
|
38
|
+
[split]
|
|
39
|
+
enabled = true
|
|
40
|
+
val_fraction = 0.1
|
|
41
|
+
random_seed = 42
|
|
42
|
+
stratify_by = "achievements" # Ensure val set has similar achievement distribution
|
|
43
|
+
|
|
44
|
+
# Output file names
|
|
45
|
+
train_file = "train.jsonl"
|
|
46
|
+
val_file = "val.jsonl"
|
|
47
|
+
|
|
48
|
+
# Statistics
|
|
49
|
+
[output]
|
|
50
|
+
save_stats = true
|
|
51
|
+
stats_file = "filter_stats.json"
|
|
52
|
+
save_filtered_episode_ids = true
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# SFT Training Config for Qwen3-VL-2B with Vision Data
|
|
2
|
+
# Test config for validating vision fine-tuning pipeline
|
|
3
|
+
|
|
4
|
+
[algorithm]
|
|
5
|
+
type = "offline"
|
|
6
|
+
method = "sft"
|
|
7
|
+
variety = "lora"
|
|
8
|
+
|
|
9
|
+
[job]
|
|
10
|
+
model = "Qwen/Qwen3-VL-2B-Instruct"
|
|
11
|
+
data = "examples/qwen_vl/test_data/vision_sft_test.jsonl"
|
|
12
|
+
|
|
13
|
+
[compute]
|
|
14
|
+
gpu_type = "H100"
|
|
15
|
+
gpu_count = 1
|
|
16
|
+
nodes = 1
|
|
17
|
+
|
|
18
|
+
[training]
|
|
19
|
+
mode = "lora"
|
|
20
|
+
use_qlora = false # Use full precision LoRA for vision
|
|
21
|
+
|
|
22
|
+
[training.validation]
|
|
23
|
+
enabled = false # Skip validation for quick test
|
|
24
|
+
|
|
25
|
+
[hyperparameters]
|
|
26
|
+
n_epochs = 2 # 2 epochs for test
|
|
27
|
+
train_kind = "peft"
|
|
28
|
+
per_device_batch = 1
|
|
29
|
+
gradient_accumulation_steps = 4
|
|
30
|
+
sequence_length = 2048 # Shorter for vision + text
|
|
31
|
+
learning_rate = 5e-5
|
|
32
|
+
warmup_ratio = 0.03
|
|
33
|
+
lora_rank = 16
|
|
34
|
+
lora_alpha = 32
|
|
35
|
+
lora_dropout = 0.05
|
|
36
|
+
lora_target_modules = ["q_proj", "v_proj", "k_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "mm_projector"]
|
|
37
|
+
|
|
38
|
+
[hyperparameters.parallelism]
|
|
39
|
+
use_deepspeed = false
|
|
40
|
+
fsdp = false
|
|
41
|
+
bf16 = true
|
|
42
|
+
fp16 = false
|
|
43
|
+
activation_checkpointing = false
|
|
44
|
+
|
|
45
|
+
[model_config]
|
|
46
|
+
supports_vision = true
|
|
47
|
+
max_images_per_message = 1
|
|
48
|
+
max_model_len = 2048 # Short for test
|
|
49
|
+
|
|
50
|
+
[tags]
|
|
51
|
+
experiment = "test_vision_sft"
|
|
52
|
+
purpose = "integration_test"
|
|
53
|
+
model_size = "2B"
|
|
54
|
+
data_type = "synthetic_vision"
|
|
@@ -0,0 +1,308 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""
|
|
3
|
+
Crafter agent using OpenAI's gpt-5-nano vision model.
|
|
4
|
+
|
|
5
|
+
This demonstrates gpt-5-nano playing Crafter with image observations.
|
|
6
|
+
The CrafterPolicy automatically detects vision capability from the "gpt-5"
|
|
7
|
+
model name and includes base64-encoded PNG frames in the prompt.
|
|
8
|
+
|
|
9
|
+
Requirements:
|
|
10
|
+
- `OPENAI_API_KEY` environment variable
|
|
11
|
+
- `openai` Python package (installed via project dependencies)
|
|
12
|
+
|
|
13
|
+
Usage:
|
|
14
|
+
uv run python examples/qwen_vl/crafter_gpt5nano_agent.py \
|
|
15
|
+
--model gpt-5-nano --seeds 10 --steps 20
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
from __future__ import annotations
|
|
19
|
+
|
|
20
|
+
import argparse
|
|
21
|
+
import asyncio
|
|
22
|
+
import base64
|
|
23
|
+
import json
|
|
24
|
+
import os
|
|
25
|
+
from contextlib import suppress
|
|
26
|
+
from pathlib import Path
|
|
27
|
+
from typing import Any
|
|
28
|
+
from uuid import uuid4
|
|
29
|
+
|
|
30
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.environment import (
|
|
31
|
+
CrafterEnvironmentWrapper,
|
|
32
|
+
)
|
|
33
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.envs.crafter.policy import CrafterPolicy
|
|
34
|
+
from openai import OpenAI
|
|
35
|
+
from synth_ai.environments.examples.crafter_classic.environment import CrafterClassicEnvironment
|
|
36
|
+
from synth_ai.environments.examples.crafter_classic.taskset import (
|
|
37
|
+
CrafterTaskInstance,
|
|
38
|
+
CrafterTaskInstanceMetadata,
|
|
39
|
+
)
|
|
40
|
+
from synth_ai.environments.tasks.core import Impetus, Intent
|
|
41
|
+
|
|
42
|
+
DEFAULT_OUTPUT = Path("examples/qwen_vl/temp")
|
|
43
|
+
FRAME_SUBDIR = "gpt5nano_frames"
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class EpisodeResult:
|
|
47
|
+
def __init__(self, seed: int) -> None:
|
|
48
|
+
self.seed = seed
|
|
49
|
+
self.steps_taken: int = 0
|
|
50
|
+
self.achievements: set[str] = set()
|
|
51
|
+
self.total_reward: float = 0.0
|
|
52
|
+
self.tool_calls: int = 0
|
|
53
|
+
|
|
54
|
+
def record_observation(self, observation: dict[str, Any]) -> None:
|
|
55
|
+
obs = observation.get("observation") if isinstance(observation, dict) else None
|
|
56
|
+
if not isinstance(obs, dict):
|
|
57
|
+
return
|
|
58
|
+
ach = obs.get("achievements_status")
|
|
59
|
+
if isinstance(ach, dict):
|
|
60
|
+
for name, unlocked in ach.items():
|
|
61
|
+
if unlocked:
|
|
62
|
+
self.achievements.add(str(name))
|
|
63
|
+
reward = obs.get("reward_last_step")
|
|
64
|
+
if isinstance(reward, int | float):
|
|
65
|
+
self.total_reward += float(reward)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _ensure_client() -> OpenAI:
|
|
69
|
+
"""Initialize OpenAI client."""
|
|
70
|
+
api_key = os.getenv("OPENAI_API_KEY")
|
|
71
|
+
if not api_key:
|
|
72
|
+
raise RuntimeError("OPENAI_API_KEY must be set for OpenAI calls")
|
|
73
|
+
return OpenAI(api_key=api_key)
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def _build_task_instance(seed: int) -> CrafterTaskInstance:
|
|
77
|
+
"""Create a Crafter task instance with specified seed."""
|
|
78
|
+
impetus = Impetus(instructions="Explore, survive, and unlock achievements.")
|
|
79
|
+
intent = Intent(
|
|
80
|
+
rubric={"goal": "Maximise Crafter achievements."},
|
|
81
|
+
gold_trajectories=None,
|
|
82
|
+
gold_state_diff={},
|
|
83
|
+
)
|
|
84
|
+
metadata = CrafterTaskInstanceMetadata(
|
|
85
|
+
difficulty="custom",
|
|
86
|
+
seed=seed,
|
|
87
|
+
num_trees_radius=0,
|
|
88
|
+
num_cows_radius=0,
|
|
89
|
+
num_hostiles_radius=0,
|
|
90
|
+
)
|
|
91
|
+
instance = CrafterTaskInstance(
|
|
92
|
+
id=uuid4(),
|
|
93
|
+
impetus=impetus,
|
|
94
|
+
intent=intent,
|
|
95
|
+
metadata=metadata,
|
|
96
|
+
is_reproducible=True,
|
|
97
|
+
initial_engine_snapshot=None,
|
|
98
|
+
)
|
|
99
|
+
instance.config = {"seed": seed, "length": 256, "area": [64, 64]}
|
|
100
|
+
return instance
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _decode_and_save_image(observation: dict[str, Any], path: Path) -> None:
|
|
104
|
+
"""Extract and save PNG frame from observation."""
|
|
105
|
+
obs = observation.get("observation") if isinstance(observation, dict) else None
|
|
106
|
+
if not isinstance(obs, dict):
|
|
107
|
+
return
|
|
108
|
+
base64_data = obs.get("observation_image_base64")
|
|
109
|
+
if not isinstance(base64_data, str) or not base64_data:
|
|
110
|
+
return
|
|
111
|
+
path.parent.mkdir(parents=True, exist_ok=True)
|
|
112
|
+
with suppress(Exception):
|
|
113
|
+
path.write_bytes(base64.b64decode(base64_data))
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _normalise_openai_request(payload: dict[str, Any], model: str, temperature: float) -> dict[str, Any]:
|
|
117
|
+
"""Normalize inference request for OpenAI API."""
|
|
118
|
+
request = dict(payload)
|
|
119
|
+
request["model"] = model
|
|
120
|
+
|
|
121
|
+
# Remove vendor-specific knobs unsupported by OpenAI
|
|
122
|
+
request.pop("stop_after_tool_calls", None)
|
|
123
|
+
request.pop("thinking_mode", None)
|
|
124
|
+
request.pop("thinking_budget", None)
|
|
125
|
+
|
|
126
|
+
# gpt-5 models have specific requirements
|
|
127
|
+
if "gpt-5" in model.lower():
|
|
128
|
+
# gpt-5-nano only supports temperature=1 (default)
|
|
129
|
+
request.pop("temperature", None) # Remove custom temperature
|
|
130
|
+
request.setdefault("max_completion_tokens", 512)
|
|
131
|
+
request.pop("max_tokens", None) # Remove if present
|
|
132
|
+
else:
|
|
133
|
+
# Older models use max_tokens and support custom temperature
|
|
134
|
+
request.setdefault("temperature", temperature)
|
|
135
|
+
max_completion = request.pop("max_completion_tokens", None)
|
|
136
|
+
if max_completion is not None:
|
|
137
|
+
request["max_tokens"] = max_completion
|
|
138
|
+
else:
|
|
139
|
+
request.setdefault("max_tokens", 512)
|
|
140
|
+
|
|
141
|
+
return request
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
async def _run_episode(
|
|
145
|
+
*,
|
|
146
|
+
seed: int,
|
|
147
|
+
client: OpenAI,
|
|
148
|
+
model: str,
|
|
149
|
+
max_steps: int,
|
|
150
|
+
output_dir: Path,
|
|
151
|
+
temperature: float,
|
|
152
|
+
) -> EpisodeResult:
|
|
153
|
+
"""Run a single Crafter episode with gpt-5-nano."""
|
|
154
|
+
task_instance = _build_task_instance(seed)
|
|
155
|
+
env = CrafterClassicEnvironment(task_instance)
|
|
156
|
+
wrapper = CrafterEnvironmentWrapper(env, seed=seed)
|
|
157
|
+
|
|
158
|
+
# Policy will auto-detect vision from model name (gpt-5*)
|
|
159
|
+
policy = CrafterPolicy(inference_url="openai://chat-completions", model=model)
|
|
160
|
+
await policy.initialize({"use_tools": True, "model": model})
|
|
161
|
+
|
|
162
|
+
episode_result = EpisodeResult(seed=seed)
|
|
163
|
+
|
|
164
|
+
observation_packet = await wrapper.initialize()
|
|
165
|
+
episode_result.record_observation(observation_packet)
|
|
166
|
+
|
|
167
|
+
frames_root = output_dir / FRAME_SUBDIR / f"seed_{seed:04d}"
|
|
168
|
+
_decode_and_save_image(observation_packet, frames_root / "step_000.png")
|
|
169
|
+
|
|
170
|
+
for step_idx in range(max_steps):
|
|
171
|
+
obs_dict = observation_packet.get("observation")
|
|
172
|
+
if not isinstance(obs_dict, dict):
|
|
173
|
+
break
|
|
174
|
+
|
|
175
|
+
# Format observation text
|
|
176
|
+
obs_text = policy._format_observation_for_llm(observation_packet) # noqa: SLF001
|
|
177
|
+
|
|
178
|
+
# Get tool calls from policy
|
|
179
|
+
tool_calls, meta = await policy.step(
|
|
180
|
+
observation_text=obs_text,
|
|
181
|
+
metadata={"raw_observation": observation_packet},
|
|
182
|
+
)
|
|
183
|
+
if "inference_request" not in meta:
|
|
184
|
+
break
|
|
185
|
+
|
|
186
|
+
episode_result.steps_taken += 1
|
|
187
|
+
inference_request = _normalise_openai_request(
|
|
188
|
+
meta["inference_request"],
|
|
189
|
+
model=model,
|
|
190
|
+
temperature=temperature,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
# Call OpenAI API
|
|
194
|
+
response = client.chat.completions.create(**inference_request)
|
|
195
|
+
response_dict = response.model_dump()
|
|
196
|
+
|
|
197
|
+
# Parse tool calls
|
|
198
|
+
assistant_tool_calls = CrafterPolicy.parse_response_to_tool_calls(
|
|
199
|
+
response_dict,
|
|
200
|
+
use_tools=policy.use_tools,
|
|
201
|
+
)
|
|
202
|
+
if not assistant_tool_calls:
|
|
203
|
+
print(
|
|
204
|
+
f"Seed {seed}: no tool calls returned by model; ending episode early at step {step_idx}."
|
|
205
|
+
)
|
|
206
|
+
break
|
|
207
|
+
|
|
208
|
+
episode_result.tool_calls += len(assistant_tool_calls)
|
|
209
|
+
|
|
210
|
+
# Extract assistant message
|
|
211
|
+
assistant_message = response_dict["choices"][0].get("message") or {}
|
|
212
|
+
assistant_text = assistant_message.get("content")
|
|
213
|
+
|
|
214
|
+
# Execute action in environment
|
|
215
|
+
env_response = await wrapper.step(assistant_tool_calls)
|
|
216
|
+
if not isinstance(env_response, dict):
|
|
217
|
+
raise RuntimeError(
|
|
218
|
+
f"Unexpected environment response type: {type(env_response)!r}"
|
|
219
|
+
)
|
|
220
|
+
episode_result.record_observation(env_response)
|
|
221
|
+
|
|
222
|
+
# Update policy history
|
|
223
|
+
policy._append_assistant_turn( # noqa: SLF001
|
|
224
|
+
assistant_text,
|
|
225
|
+
assistant_tool_calls,
|
|
226
|
+
env_response,
|
|
227
|
+
)
|
|
228
|
+
|
|
229
|
+
# Save frame
|
|
230
|
+
frame_path = frames_root / f"step_{step_idx + 1:03d}.png"
|
|
231
|
+
_decode_and_save_image(env_response, frame_path)
|
|
232
|
+
|
|
233
|
+
if env_response.get("done"):
|
|
234
|
+
break
|
|
235
|
+
observation_packet = env_response
|
|
236
|
+
|
|
237
|
+
await wrapper.terminate()
|
|
238
|
+
return episode_result
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
async def main() -> None:
|
|
242
|
+
parser = argparse.ArgumentParser(description=__doc__)
|
|
243
|
+
parser.add_argument(
|
|
244
|
+
"--model",
|
|
245
|
+
default="gpt-5-nano",
|
|
246
|
+
help="OpenAI model name (e.g., gpt-5-nano, gpt-4o-mini-2024-07-18)",
|
|
247
|
+
)
|
|
248
|
+
parser.add_argument("--seeds", type=int, default=10, help="Number of random seeds to evaluate")
|
|
249
|
+
parser.add_argument("--steps", type=int, default=20, help="Max steps per seed")
|
|
250
|
+
parser.add_argument("--temperature", type=float, default=0.6, help="Sampling temperature")
|
|
251
|
+
parser.add_argument(
|
|
252
|
+
"--output-dir",
|
|
253
|
+
type=Path,
|
|
254
|
+
default=DEFAULT_OUTPUT,
|
|
255
|
+
help=f"Directory for saved frames and summaries (default: {DEFAULT_OUTPUT})",
|
|
256
|
+
)
|
|
257
|
+
args = parser.parse_args()
|
|
258
|
+
|
|
259
|
+
client = _ensure_client()
|
|
260
|
+
results: list[EpisodeResult] = []
|
|
261
|
+
|
|
262
|
+
seeds = list(range(args.seeds))
|
|
263
|
+
print(f"Running {len(seeds)} Crafter episodes with model={args.model}")
|
|
264
|
+
print(f"Using OpenAI API\n")
|
|
265
|
+
|
|
266
|
+
for seed in seeds:
|
|
267
|
+
result = await _run_episode(
|
|
268
|
+
seed=seed,
|
|
269
|
+
client=client,
|
|
270
|
+
model=args.model,
|
|
271
|
+
max_steps=args.steps,
|
|
272
|
+
output_dir=args.output_dir,
|
|
273
|
+
temperature=args.temperature,
|
|
274
|
+
)
|
|
275
|
+
results.append(result)
|
|
276
|
+
print(
|
|
277
|
+
f"Seed {seed:02d}: steps={result.steps_taken}, "
|
|
278
|
+
f"achievements={len(result.achievements)}, "
|
|
279
|
+
f"tool_calls={result.tool_calls}, reward≈{result.total_reward:.3f}"
|
|
280
|
+
)
|
|
281
|
+
|
|
282
|
+
summary = {
|
|
283
|
+
"model": args.model,
|
|
284
|
+
"provider": "openai",
|
|
285
|
+
"episodes": len(results),
|
|
286
|
+
"mean_steps": round(
|
|
287
|
+
sum(res.steps_taken for res in results) / max(len(results), 1), 2
|
|
288
|
+
),
|
|
289
|
+
"mean_achievements": round(
|
|
290
|
+
sum(len(res.achievements) for res in results) / max(len(results), 1), 2
|
|
291
|
+
),
|
|
292
|
+
"total_tool_calls": sum(res.tool_calls for res in results),
|
|
293
|
+
"output_dir": str(args.output_dir / FRAME_SUBDIR),
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
args.output_dir.mkdir(parents=True, exist_ok=True)
|
|
297
|
+
summary_path = args.output_dir / "gpt5nano_summary.json"
|
|
298
|
+
summary_path.write_text(json.dumps(summary, indent=2), encoding="utf-8")
|
|
299
|
+
|
|
300
|
+
print("\nSummary")
|
|
301
|
+
print("-------")
|
|
302
|
+
print(json.dumps(summary, indent=2))
|
|
303
|
+
print(f"\nFrames saved in: {summary['output_dir']}")
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
if __name__ == "__main__":
|
|
307
|
+
asyncio.run(main())
|
|
308
|
+
|