synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
"""Legacy entrypoint for the math single-step task app."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import argparse
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from fastapi.exceptions import RequestValidationError
|
|
9
|
+
from fastapi.responses import JSONResponse
|
|
10
|
+
from starlette.requests import Request
|
|
11
|
+
from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
|
|
12
|
+
from synth_ai.task.server import create_task_app, run_task_app
|
|
13
|
+
|
|
14
|
+
from .math_single_step import build_config
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def fastapi_app():
|
|
18
|
+
"""Return a FastAPI application for hosting the math task app."""
|
|
19
|
+
|
|
20
|
+
app = create_task_app(build_config())
|
|
21
|
+
|
|
22
|
+
# Replace default health endpoints with auth-tolerant handlers.
|
|
23
|
+
filtered_routes = []
|
|
24
|
+
for route in app.router.routes:
|
|
25
|
+
path = getattr(route, "path", None)
|
|
26
|
+
methods = getattr(route, "methods", set()) or set()
|
|
27
|
+
if path in {"/health", "/health/rollout"} and "GET" in methods:
|
|
28
|
+
continue
|
|
29
|
+
filtered_routes.append(route)
|
|
30
|
+
app.router.routes = filtered_routes
|
|
31
|
+
|
|
32
|
+
def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
|
|
33
|
+
if not env_key:
|
|
34
|
+
return None
|
|
35
|
+
prefix = env_key[: max(1, len(env_key) // 2)]
|
|
36
|
+
print(f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}")
|
|
37
|
+
return prefix
|
|
38
|
+
|
|
39
|
+
@app.get("/health")
|
|
40
|
+
async def health(request: Request):
|
|
41
|
+
env_key = normalize_environment_api_key()
|
|
42
|
+
if not env_key:
|
|
43
|
+
return JSONResponse(
|
|
44
|
+
status_code=503,
|
|
45
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
46
|
+
)
|
|
47
|
+
if not is_api_key_header_authorized(request):
|
|
48
|
+
prefix = _log_env_key_prefix("health", env_key)
|
|
49
|
+
content = {"status": "healthy", "authorized": False}
|
|
50
|
+
if prefix:
|
|
51
|
+
content["expected_api_key_prefix"] = prefix
|
|
52
|
+
return JSONResponse(status_code=200, content=content)
|
|
53
|
+
return {"status": "healthy", "authorized": True}
|
|
54
|
+
|
|
55
|
+
@app.get("/health/rollout")
|
|
56
|
+
async def health_rollout(request: Request):
|
|
57
|
+
env_key = normalize_environment_api_key()
|
|
58
|
+
if not env_key:
|
|
59
|
+
return JSONResponse(
|
|
60
|
+
status_code=503,
|
|
61
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
62
|
+
)
|
|
63
|
+
if not is_api_key_header_authorized(request):
|
|
64
|
+
prefix = _log_env_key_prefix("health/rollout", env_key)
|
|
65
|
+
content = {"status": "healthy", "authorized": False}
|
|
66
|
+
if prefix:
|
|
67
|
+
content["expected_api_key_prefix"] = prefix
|
|
68
|
+
return JSONResponse(status_code=200, content=content)
|
|
69
|
+
return {"ok": True, "authorized": True}
|
|
70
|
+
|
|
71
|
+
@app.exception_handler(RequestValidationError)
|
|
72
|
+
async def _on_validation_error(request: Request, exc: RequestValidationError):
|
|
73
|
+
try:
|
|
74
|
+
hdr = request.headers
|
|
75
|
+
snapshot = {
|
|
76
|
+
"path": str(request.url.path),
|
|
77
|
+
"have_x_api_key": bool(hdr.get("x-api-key")),
|
|
78
|
+
"have_x_api_keys": bool(hdr.get("x-api-keys")),
|
|
79
|
+
"have_authorization": bool(hdr.get("authorization")),
|
|
80
|
+
"errors": exc.errors()[:5],
|
|
81
|
+
}
|
|
82
|
+
print("[422] validation", snapshot, flush=True)
|
|
83
|
+
except Exception:
|
|
84
|
+
pass
|
|
85
|
+
return JSONResponse(
|
|
86
|
+
status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
return app
|
|
90
|
+
|
|
91
|
+
|
|
92
|
+
if __name__ == "__main__":
|
|
93
|
+
parser = argparse.ArgumentParser(description="Run the math single-step task app locally")
|
|
94
|
+
parser.add_argument("--host", default="0.0.0.0")
|
|
95
|
+
parser.add_argument("--port", type=int, default=8101)
|
|
96
|
+
parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
|
|
97
|
+
parser.add_argument(
|
|
98
|
+
"--env-file",
|
|
99
|
+
action="append",
|
|
100
|
+
default=[],
|
|
101
|
+
help="Path to .env file to load (can be specified multiple times)",
|
|
102
|
+
)
|
|
103
|
+
args = parser.parse_args()
|
|
104
|
+
|
|
105
|
+
run_task_app(
|
|
106
|
+
build_config,
|
|
107
|
+
host=args.host,
|
|
108
|
+
port=args.port,
|
|
109
|
+
reload=args.reload,
|
|
110
|
+
env_files=args.env_file or [],
|
|
111
|
+
)
|
examples/run_crafter_demo.sh
CHANGED
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
# This script demonstrates a reactive agent in the Crafter environment
|
|
5
5
|
|
|
6
6
|
echo "🚀 Starting Crafter agent demo with Gemini 1.5 Flash..."
|
|
7
|
-
echo "Make sure the synth-ai service is running: uvx synth-ai
|
|
7
|
+
echo "Make sure the synth-ai service is running: uvx synth-ai deploy --runtime uvicorn"
|
|
8
8
|
echo ""
|
|
9
9
|
|
|
10
|
-
uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash
|
|
10
|
+
uv run python -m synth_ai.environments.examples.crafter_classic.agent_demos.test_crafter_react_agent --model gemini-1.5-flash
|
examples/sft/README.md
CHANGED
|
@@ -25,9 +25,9 @@ You can generate traces with the Crafter task app and then export them to SFT JS
|
|
|
25
25
|
|
|
26
26
|
```bash
|
|
27
27
|
# Serve the task app locally with tracing enabled (example)
|
|
28
|
-
uvx synth-ai
|
|
28
|
+
uvx synth-ai deploy --runtime uvicorn grpo-crafter \
|
|
29
29
|
--trace traces/v3 \
|
|
30
|
-
--trace-db traces/v3/
|
|
30
|
+
--trace-db traces/v3/task_app_traces_<timestamp>.db \
|
|
31
31
|
--port 8001
|
|
32
32
|
|
|
33
33
|
# Or run traced local rollouts to accumulate data
|
|
@@ -36,9 +36,9 @@ uv run python examples/warming_up_to_rl/run_local_rollout_traced.py \
|
|
|
36
36
|
|
|
37
37
|
# Export SFT dataset from the trace DB
|
|
38
38
|
uv run python examples/warming_up_to_rl/export_trace_sft.py \
|
|
39
|
-
--db traces/v3/
|
|
39
|
+
--db traces/v3/task_app_traces_<timestamp>.db \
|
|
40
40
|
--min-unique 0 \
|
|
41
|
-
--output examples/sft/ft_data/
|
|
41
|
+
--output examples/sft/ft_data/crafter_sft.jsonl
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
Notes:
|
|
@@ -56,7 +56,7 @@ Use the standard CLI. Do not use a custom Python finetuning script. Point the CL
|
|
|
56
56
|
uvx synth-ai train \
|
|
57
57
|
--type sft \
|
|
58
58
|
--config examples/sft/configs/crafter_lora_qwen0p6b.toml \
|
|
59
|
-
--dataset examples/sft/ft_data/
|
|
59
|
+
--dataset examples/sft/ft_data/crafter_sft.jsonl \
|
|
60
60
|
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
|
|
61
61
|
```
|
|
62
62
|
|
|
@@ -76,7 +76,7 @@ Full finetuning updates all weights and uses a near-identical CLI flow with the
|
|
|
76
76
|
uvx synth-ai train \
|
|
77
77
|
--type sft \
|
|
78
78
|
--config examples/sft/configs/crafter_fft_qwen0p6b.toml \
|
|
79
|
-
--dataset examples/sft/ft_data/
|
|
79
|
+
--dataset examples/sft/ft_data/crafter_sft.jsonl \
|
|
80
80
|
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/backend/.env.dev
|
|
81
81
|
```
|
|
82
82
|
|
|
@@ -1,7 +1,12 @@
|
|
|
1
|
+
[algorithm]
|
|
2
|
+
type = "offline"
|
|
3
|
+
method = "sft"
|
|
4
|
+
variety = "fft"
|
|
5
|
+
|
|
1
6
|
[job]
|
|
2
7
|
model = "Qwen/Qwen3-0.6B"
|
|
3
8
|
# Prefer passing --dataset at runtime for repeatability
|
|
4
|
-
# data = "examples/sft/ft_data/
|
|
9
|
+
# data = "examples/sft/ft_data/crafter_sft.jsonl"
|
|
5
10
|
|
|
6
11
|
[compute]
|
|
7
12
|
gpu_type = "H100"
|
|
@@ -11,7 +16,7 @@ nodes = 1
|
|
|
11
16
|
[data]
|
|
12
17
|
topology = {}
|
|
13
18
|
# Optional validation set if you have one locally
|
|
14
|
-
# validation_path = "examples/sft/ft_data/
|
|
19
|
+
# validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
|
|
15
20
|
|
|
16
21
|
[training]
|
|
17
22
|
mode = "sft_offline"
|
|
@@ -1,7 +1,12 @@
|
|
|
1
|
+
[algorithm]
|
|
2
|
+
type = "offline"
|
|
3
|
+
method = "sft"
|
|
4
|
+
variety = "qlora"
|
|
5
|
+
|
|
1
6
|
[job]
|
|
2
7
|
model = "Qwen/Qwen3-0.6B"
|
|
3
8
|
# Optionally set here, but prefer passing --dataset at runtime
|
|
4
|
-
# data = "examples/sft/ft_data/
|
|
9
|
+
# data = "examples/sft/ft_data/crafter_sft.jsonl"
|
|
5
10
|
|
|
6
11
|
[compute]
|
|
7
12
|
gpu_type = "H100"
|
|
@@ -12,7 +17,7 @@ nodes = 1
|
|
|
12
17
|
# Forwarded into metadata.effective_config
|
|
13
18
|
topology = {}
|
|
14
19
|
# Optional validation set if you have one locally
|
|
15
|
-
# validation_path = "examples/sft/ft_data/
|
|
20
|
+
# validation_path = "examples/sft/ft_data/crafter_sft.val.jsonl"
|
|
16
21
|
|
|
17
22
|
[training]
|
|
18
23
|
mode = "lora"
|
|
@@ -42,4 +47,3 @@ fsdp = false
|
|
|
42
47
|
bf16 = true
|
|
43
48
|
fp16 = false
|
|
44
49
|
activation_checkpointing = true
|
|
45
|
-
|
examples/sft/evaluate.py
CHANGED
|
@@ -11,6 +11,7 @@ from __future__ import annotations
|
|
|
11
11
|
import argparse
|
|
12
12
|
import asyncio
|
|
13
13
|
import os
|
|
14
|
+
from contextlib import suppress
|
|
14
15
|
from dataclasses import dataclass
|
|
15
16
|
from typing import Any
|
|
16
17
|
|
|
@@ -104,10 +105,8 @@ async def main() -> None:
|
|
|
104
105
|
for r in results:
|
|
105
106
|
ers = r.get("episode_returns") or []
|
|
106
107
|
if isinstance(ers, list) and ers:
|
|
107
|
-
|
|
108
|
+
with suppress(Exception):
|
|
108
109
|
flat_returns.append(float(ers[0]))
|
|
109
|
-
except Exception:
|
|
110
|
-
pass
|
|
111
110
|
if flat_returns:
|
|
112
111
|
mean_ret = sum(flat_returns) / len(flat_returns)
|
|
113
112
|
print(f"mean_return={mean_ret:.3f} over {len(flat_returns)} episodes")
|
|
@@ -116,4 +115,3 @@ async def main() -> None:
|
|
|
116
115
|
if __name__ == "__main__":
|
|
117
116
|
asyncio.run(main())
|
|
118
117
|
|
|
119
|
-
|
examples/sft/export_dataset.py
CHANGED
|
@@ -20,12 +20,17 @@ from examples.warming_up_to_rl.export_trace_sft import (
|
|
|
20
20
|
parse_event_filters,
|
|
21
21
|
write_jsonl,
|
|
22
22
|
)
|
|
23
|
+
from synth_ai.tracing_v3.constants import TRACE_DB_DIR, canonical_trace_db_name
|
|
23
24
|
|
|
24
25
|
|
|
25
26
|
def main() -> None:
|
|
26
27
|
p = argparse.ArgumentParser(description=__doc__)
|
|
27
|
-
p.add_argument(
|
|
28
|
-
|
|
28
|
+
p.add_argument(
|
|
29
|
+
"--db",
|
|
30
|
+
type=Path,
|
|
31
|
+
default=TRACE_DB_DIR / canonical_trace_db_name(),
|
|
32
|
+
)
|
|
33
|
+
p.add_argument("--output", type=Path, default=Path("examples/sft/ft_data/crafter_sft.jsonl"))
|
|
29
34
|
p.add_argument("--model", action="append", dest="models")
|
|
30
35
|
p.add_argument("--provider", action="append", dest="providers")
|
|
31
36
|
p.add_argument("--min-unique", type=int, default=0)
|
|
@@ -113,5 +118,3 @@ def main() -> None:
|
|
|
113
118
|
|
|
114
119
|
if __name__ == "__main__":
|
|
115
120
|
main()
|
|
116
|
-
|
|
117
|
-
|
examples/swe/task_app/README.md
CHANGED
|
@@ -28,17 +28,17 @@ endpoints.
|
|
|
28
28
|
## Using the task app
|
|
29
29
|
|
|
30
30
|
```
|
|
31
|
-
uvx synth-ai
|
|
31
|
+
uvx synth-ai deploy --runtime uvicorn swe-mini --port 8020
|
|
32
32
|
```
|
|
33
33
|
|
|
34
34
|
### Recommended: non-interactive serve + .env
|
|
35
35
|
|
|
36
36
|
```bash
|
|
37
|
-
uvx synth-ai
|
|
37
|
+
uvx synth-ai deploy --runtime uvicorn swe-mini \
|
|
38
38
|
--port 8020 \
|
|
39
39
|
--env-file .env \
|
|
40
40
|
--trace traces/v3 \
|
|
41
|
-
--trace-db traces/v3/
|
|
41
|
+
--trace-db traces/v3/task_app_traces_<timestamp>.db
|
|
42
42
|
```
|
|
43
43
|
|
|
44
44
|
This avoids interactive prompts (useful for CI) and loads `ENVIRONMENT_API_KEY`, `OPENAI_API_KEY`, etc. from `.env`.
|
|
@@ -60,6 +60,36 @@ Execution is handled by mini-swe's environment classes. Configure execution via
|
|
|
60
60
|
`SWE_MINI_ENVIRONMENT_CLASS` (`local`, `docker`, `singularity`, …) and pass
|
|
61
61
|
additional keyword arguments with `SWE_MINI_ENVIRONMENT_KWARGS` (JSON).
|
|
62
62
|
|
|
63
|
+
### Morph Cloud backend
|
|
64
|
+
|
|
65
|
+
The task app now ships with a Morph-powered environment class so you can run
|
|
66
|
+
mini-SWE rollouts in managed sandboxes. When `MORPH_API_KEY` is present the app
|
|
67
|
+
defaults to this backend automatically unless you override
|
|
68
|
+
`SWE_MINI_ENVIRONMENT_CLASS`.
|
|
69
|
+
|
|
70
|
+
1. Install the optional dependencies: `pip install "synth-ai[swe]"`.
|
|
71
|
+
2. Export your API key: `export MORPH_API_KEY=...`.
|
|
72
|
+
3. Point the task app at Morph by setting:
|
|
73
|
+
|
|
74
|
+
```bash
|
|
75
|
+
export SWE_MINI_ENVIRONMENT_CLASS=morph
|
|
76
|
+
export SWE_MINI_ENVIRONMENT_KWARGS='{
|
|
77
|
+
"snapshot_id": "snap_your_pre_baked_swebench_image",
|
|
78
|
+
"cwd": "/workspace/swebench",
|
|
79
|
+
"env": {"PIP_PROGRESS_BAR": "off"},
|
|
80
|
+
"metadata": {"project": "synth-ai", "task": "swe-mini"}
|
|
81
|
+
}'
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
If you do not have a pre-built snapshot, provide `"image_id"` (defaults to
|
|
85
|
+
`morphvm-minimal`) along with resource hints (`"vcpus"`, `"memory_mb"`,
|
|
86
|
+
`"disk_mb"`). You can also set `SWE_MINI_MORPH_SNAPSHOT_ID` globally.
|
|
87
|
+
|
|
88
|
+
During cleanup the backend deletes the remote workspace and stops the Morph
|
|
89
|
+
instance automatically. All shell commands (including submissions) now execute
|
|
90
|
+
inside the Morph sandbox, enabling RL workflows that require persistent remote
|
|
91
|
+
compute.
|
|
92
|
+
|
|
63
93
|
### Tracing & SFT
|
|
64
94
|
|
|
65
95
|
Tracing works the same as Crafter; pass `--trace` / `--trace-db` to the CLI or
|
|
@@ -404,6 +404,10 @@ def _ensure_env_has_task(
|
|
|
404
404
|
if not instance_id:
|
|
405
405
|
raise ValueError("mini-swe rollout request requires env.config.instance_id")
|
|
406
406
|
config["task"] = dataset.get(instance_id)
|
|
407
|
+
env_cfg = dict(config.get("environment") or {})
|
|
408
|
+
if "environment_class" not in env_cfg and os.getenv("MORPH_API_KEY"):
|
|
409
|
+
env_cfg["environment_class"] = "morph"
|
|
410
|
+
config["environment"] = env_cfg
|
|
407
411
|
return env_spec.model_copy(update={"config": config})
|
|
408
412
|
|
|
409
413
|
|
|
@@ -556,7 +560,6 @@ register_task_app(
|
|
|
556
560
|
description="mini-swe-agent task app with rollout + proxy endpoints",
|
|
557
561
|
config_factory=build_config,
|
|
558
562
|
aliases=("mini-swe", "swe-mini-task"),
|
|
559
|
-
env_files=(str(REPO_ROOT / "backend" / ".env.dev"),),
|
|
560
563
|
modal=ModalDeploymentConfig(
|
|
561
564
|
app_name="swe-mini-task-app",
|
|
562
565
|
python_version="3.11",
|
|
@@ -114,23 +114,11 @@ if __name__ == "__main__":
|
|
|
114
114
|
parser.add_argument("--host", default="0.0.0.0")
|
|
115
115
|
parser.add_argument("--port", type=int, default=8020)
|
|
116
116
|
parser.add_argument("--reload", action="store_true", help="Enable uvicorn autoreload")
|
|
117
|
-
parser.add_argument(
|
|
118
|
-
"--env-file",
|
|
119
|
-
action="append",
|
|
120
|
-
default=[],
|
|
121
|
-
help="Additional .env files to load before startup",
|
|
122
|
-
)
|
|
123
117
|
args = parser.parse_args()
|
|
124
118
|
|
|
125
|
-
default_env = Path(__file__).resolve().parents[4] / "backend" / ".env.dev"
|
|
126
|
-
env_files = [str(default_env)] if default_env.exists() else []
|
|
127
|
-
env_files.extend(args.env_file or [])
|
|
128
|
-
|
|
129
119
|
run_task_app(
|
|
130
120
|
build_task_app_config,
|
|
131
121
|
host=args.host,
|
|
132
122
|
port=args.port,
|
|
133
123
|
reload=args.reload,
|
|
134
|
-
env_files=env_files,
|
|
135
124
|
)
|
|
136
|
-
|
|
@@ -46,7 +46,7 @@ class CrafterReActAgent:
|
|
|
46
46
|
"- Always return a single tool call: interact_many({actions: [...]})\n"
|
|
47
47
|
"- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
|
|
48
48
|
"- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
|
|
49
|
-
"
|
|
49
|
+
"\n"
|
|
50
50
|
"Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
|
|
51
51
|
"place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
|
|
52
52
|
"make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
|
|
@@ -18,6 +18,7 @@ from typing import Any
|
|
|
18
18
|
from minisweagent.environments import get_environment
|
|
19
19
|
from synth_ai.environments.environment.tools import EnvToolCall
|
|
20
20
|
|
|
21
|
+
from examples.swe.task_app.morph_backend import MorphSandboxBackend
|
|
21
22
|
from .shared import summarise_history
|
|
22
23
|
from .tools import TOOLS_SCHEMA
|
|
23
24
|
|
|
@@ -25,8 +26,9 @@ logger = logging.getLogger(__name__)
|
|
|
25
26
|
|
|
26
27
|
|
|
27
28
|
def _environment_type_from_config(config: dict[str, Any]) -> str:
|
|
29
|
+
default = "morph" if os.getenv("MORPH_API_KEY") else "local"
|
|
28
30
|
value = (config or {}).get("environment_class") or os.getenv(
|
|
29
|
-
"SWE_MINI_ENVIRONMENT_CLASS",
|
|
31
|
+
"SWE_MINI_ENVIRONMENT_CLASS", default
|
|
30
32
|
)
|
|
31
33
|
return str(value).strip() or "local"
|
|
32
34
|
|
|
@@ -91,6 +93,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
91
93
|
self._local_workspace_dir: Path | None = None
|
|
92
94
|
self._remote_workspace: str | None = None
|
|
93
95
|
self._cleanup_workspace = False
|
|
96
|
+
self._using_morph_backend = False
|
|
94
97
|
|
|
95
98
|
if self.environment_type == "local":
|
|
96
99
|
workspace = self._prepare_local_workspace(kwargs)
|
|
@@ -117,11 +120,11 @@ class MiniSweEnvironmentWrapper:
|
|
|
117
120
|
timeout = self.env_config.get("timeout")
|
|
118
121
|
if timeout and "timeout" not in kwargs:
|
|
119
122
|
kwargs["timeout"] = int(timeout)
|
|
120
|
-
if self.repo_url and "image" not in kwargs:
|
|
123
|
+
if self.environment_type in {"docker", "bubblewrap"} and self.repo_url and "image" not in kwargs:
|
|
121
124
|
image = self.metadata.get("image_name") or os.getenv("SWE_MINI_DOCKER_IMAGE")
|
|
122
125
|
if image:
|
|
123
126
|
kwargs["image"] = image
|
|
124
|
-
if self.environment_type in {"docker", "bubblewrap"}:
|
|
127
|
+
if self.environment_type in {"docker", "bubblewrap", "morph"}:
|
|
125
128
|
remote_env = dict(kwargs.get("env") or {})
|
|
126
129
|
remote_env.setdefault("GIT_TERMINAL_PROMPT", "0")
|
|
127
130
|
kwargs["env"] = remote_env
|
|
@@ -131,13 +134,34 @@ class MiniSweEnvironmentWrapper:
|
|
|
131
134
|
self.environment_type,
|
|
132
135
|
kwargs,
|
|
133
136
|
)
|
|
134
|
-
self.
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
137
|
+
if self.environment_type == "morph":
|
|
138
|
+
morph_kwargs = dict(kwargs)
|
|
139
|
+
image_value = morph_kwargs.pop("image", None)
|
|
140
|
+
if image_value and "image_id" not in morph_kwargs:
|
|
141
|
+
morph_kwargs["image_id"] = image_value
|
|
142
|
+
timeout_value = morph_kwargs.pop("timeout", None)
|
|
143
|
+
if timeout_value is not None and "startup_timeout" not in morph_kwargs:
|
|
144
|
+
try:
|
|
145
|
+
morph_kwargs["startup_timeout"] = int(timeout_value)
|
|
146
|
+
except Exception:
|
|
147
|
+
logger.warning("Invalid timeout value for morph backend: %r", timeout_value)
|
|
148
|
+
metadata_override = morph_kwargs.pop("metadata", {}) or {}
|
|
149
|
+
metadata_payload = {
|
|
150
|
+
"app": "swe-mini",
|
|
151
|
+
"instance_id": self.instance_id,
|
|
152
|
+
}
|
|
153
|
+
metadata_payload.update({str(k): str(v) for k, v in dict(metadata_override).items()})
|
|
154
|
+
morph_kwargs["metadata"] = metadata_payload
|
|
155
|
+
self.env = MorphSandboxBackend(**morph_kwargs)
|
|
156
|
+
self._using_morph_backend = True
|
|
157
|
+
else:
|
|
158
|
+
self.env = get_environment(
|
|
159
|
+
{
|
|
160
|
+
"environment_class": self.environment_type,
|
|
161
|
+
**kwargs,
|
|
162
|
+
},
|
|
163
|
+
default_type="local",
|
|
164
|
+
)
|
|
141
165
|
|
|
142
166
|
if self.environment_type != "local":
|
|
143
167
|
self._bootstrap_remote_workspace()
|
|
@@ -181,6 +205,9 @@ class MiniSweEnvironmentWrapper:
|
|
|
181
205
|
with contextlib.suppress(Exception):
|
|
182
206
|
self.env.execute(f"rm -rf {shlex.quote(self._remote_workspace)}")
|
|
183
207
|
self._remote_workspace = None
|
|
208
|
+
if self._using_morph_backend and hasattr(self.env, "close"):
|
|
209
|
+
with contextlib.suppress(Exception):
|
|
210
|
+
self.env.close()
|
|
184
211
|
|
|
185
212
|
def _resolve_repo_url(self, metadata: dict[str, Any]) -> str | None:
|
|
186
213
|
candidates = [
|
|
@@ -776,7 +803,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
776
803
|
or os.getenv("SWE_REX_MODAL_SANDBOX_KWARGS")
|
|
777
804
|
)
|
|
778
805
|
modal_kwargs: dict[str, Any] = {}
|
|
779
|
-
if isinstance(modal_kwargs_raw,
|
|
806
|
+
if isinstance(modal_kwargs_raw, dict | list):
|
|
780
807
|
modal_kwargs = dict(modal_kwargs_raw or {})
|
|
781
808
|
elif isinstance(modal_kwargs_raw, str) and modal_kwargs_raw.strip():
|
|
782
809
|
try:
|
|
@@ -841,9 +868,9 @@ class MiniSweEnvironmentWrapper:
|
|
|
841
868
|
instance_image_tag=instance_image_tag,
|
|
842
869
|
env_image_tag=env_image_tag,
|
|
843
870
|
model_name=model_name,
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
871
|
+
command_cls=Command,
|
|
872
|
+
write_file_request_cls=WriteFileRequest,
|
|
873
|
+
read_file_request_cls=ReadFileRequest,
|
|
847
874
|
)
|
|
848
875
|
try:
|
|
849
876
|
return self._run_coroutine_blocking(coro)
|
|
@@ -867,9 +894,9 @@ class MiniSweEnvironmentWrapper:
|
|
|
867
894
|
instance_image_tag: str,
|
|
868
895
|
env_image_tag: str,
|
|
869
896
|
model_name: str,
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
897
|
+
command_cls,
|
|
898
|
+
write_file_request_cls,
|
|
899
|
+
read_file_request_cls,
|
|
873
900
|
) -> dict[str, Any]:
|
|
874
901
|
deployment = deployment_config.get_deployment()
|
|
875
902
|
await deployment.start()
|
|
@@ -880,7 +907,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
880
907
|
|
|
881
908
|
# Ensure working directory exists.
|
|
882
909
|
mkdir_resp = await runtime.execute(
|
|
883
|
-
|
|
910
|
+
command_cls(command=["mkdir", "-p", remote_root], timeout=60, shell=False)
|
|
884
911
|
)
|
|
885
912
|
if mkdir_resp.exit_code not in (0, None):
|
|
886
913
|
logger.warning("Failed to ensure remote directory %s (exit=%s)", remote_root, mkdir_resp.exit_code)
|
|
@@ -888,8 +915,8 @@ class MiniSweEnvironmentWrapper:
|
|
|
888
915
|
# Upload dataset & predictions.
|
|
889
916
|
dataset_blob = json.dumps([instance], ensure_ascii=False)
|
|
890
917
|
predictions_blob = json.dumps({instance_id: prediction}, ensure_ascii=False)
|
|
891
|
-
await runtime.write_file(
|
|
892
|
-
await runtime.write_file(
|
|
918
|
+
await runtime.write_file(write_file_request_cls(path=dataset_remote_path, content=dataset_blob))
|
|
919
|
+
await runtime.write_file(write_file_request_cls(path=predictions_remote_path, content=predictions_blob))
|
|
893
920
|
|
|
894
921
|
eval_cmd = [
|
|
895
922
|
"python",
|
|
@@ -921,7 +948,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
921
948
|
|
|
922
949
|
command_timeout = max(eval_timeout + 900, 1200)
|
|
923
950
|
response = await runtime.execute(
|
|
924
|
-
|
|
951
|
+
command_cls(
|
|
925
952
|
command=eval_cmd,
|
|
926
953
|
timeout=command_timeout,
|
|
927
954
|
cwd=remote_root,
|
|
@@ -945,7 +972,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
945
972
|
for filename in ("report.json", "test_output.txt", "run_instance.log", "patch.diff"):
|
|
946
973
|
remote_path = f"{remote_log_dir}/{filename}"
|
|
947
974
|
try:
|
|
948
|
-
content = await runtime.read_file(
|
|
975
|
+
content = await runtime.read_file(read_file_request_cls(path=remote_path))
|
|
949
976
|
except Exception:
|
|
950
977
|
continue
|
|
951
978
|
if getattr(content, "content", None):
|
|
@@ -1073,7 +1100,7 @@ class MiniSweEnvironmentWrapper:
|
|
|
1073
1100
|
return value
|
|
1074
1101
|
if isinstance(value, str):
|
|
1075
1102
|
return value.strip().lower() in {"1", "true", "yes", "on"}
|
|
1076
|
-
if isinstance(value,
|
|
1103
|
+
if isinstance(value, int | float):
|
|
1077
1104
|
return bool(value)
|
|
1078
1105
|
return False # pragma: no cover - defensive default
|
|
1079
1106
|
|
|
@@ -156,13 +156,13 @@ class OpenAIClient:
|
|
|
156
156
|
keys_preview = sorted(processed_request.keys())
|
|
157
157
|
logger.info(f"Request keys: {keys_preview}")
|
|
158
158
|
|
|
159
|
-
# Final hard-guard for OpenAI: ensure unsupported field is not present
|
|
159
|
+
# Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
|
|
160
160
|
try:
|
|
161
|
-
|
|
161
|
+
low_url = url.lower()
|
|
162
|
+
if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
|
|
162
163
|
processed_request.pop("stop_after_tool_calls", None)
|
|
163
|
-
logger.info("Removed stop_after_tool_calls for OpenAI request")
|
|
164
|
+
logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
|
|
164
165
|
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
165
|
-
low_url = url.lower()
|
|
166
166
|
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
167
167
|
processed_request, dict
|
|
168
168
|
):
|
|
@@ -343,8 +343,6 @@ async def step_policy(
|
|
|
343
343
|
inf_req = meta["inference_request"]
|
|
344
344
|
msgs = inf_req["messages"]
|
|
345
345
|
model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
|
|
346
|
-
system_messages: list[str] = []
|
|
347
|
-
user_messages: list[str] = []
|
|
348
346
|
if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
|
|
349
347
|
sys_text = msgs[0]["content"]
|
|
350
348
|
policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
|
|
@@ -888,14 +888,6 @@ async def execute_rollout(
|
|
|
888
888
|
logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
|
|
889
889
|
tracing_context = RolloutTracingContext(tracer_instance, request, req)
|
|
890
890
|
await tracing_context.start_session()
|
|
891
|
-
# Print whether tracing is active for this rollout
|
|
892
|
-
try:
|
|
893
|
-
print(
|
|
894
|
-
f"[rollout] tracing enabled={bool(tracing_context.enabled)} run_id={request.run_id}",
|
|
895
|
-
flush=True,
|
|
896
|
-
)
|
|
897
|
-
except Exception:
|
|
898
|
-
pass
|
|
899
891
|
|
|
900
892
|
# Register run
|
|
901
893
|
registry.register_run(request.run_id)
|