synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/README.md +1 -0
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
- examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -2
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +6 -6
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +79 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +6 -6
- examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
- examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
- examples/sft/evaluate.py +2 -4
- examples/sft/export_dataset.py +7 -4
- examples/swe/task_app/README.md +33 -3
- examples/swe/task_app/grpo_swe_mini.py +4 -1
- examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/hosted/policy_routes.py +0 -2
- examples/swe/task_app/hosted/rollout.py +0 -8
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/README.md +3 -3
- examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
- examples/vlm/crafter_openai_vlm_agent.py +3 -5
- examples/vlm/filter_image_rows.py +1 -1
- examples/vlm/run_crafter_vlm_benchmark.py +2 -2
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +1 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
- examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
- examples/warming_up_to_rl/export_trace_sft.py +174 -60
- examples/warming_up_to_rl/readme.md +63 -132
- examples/warming_up_to_rl/run_fft_and_save.py +1 -1
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/run_rl_and_save.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/__init__.py +44 -30
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +144 -7
- synth_ai/api/train/__init__.py +13 -1
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +155 -17
- synth_ai/api/train/config_finder.py +18 -11
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/api/train/env_resolver.py +13 -10
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +61 -69
- synth_ai/cli/_modal_wrapper.py +7 -5
- synth_ai/cli/_typer_patch.py +0 -2
- synth_ai/cli/_validate_task_app.py +22 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +3 -1
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/recent.py +2 -1
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +21 -0
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +110 -1499
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +702 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +0 -1
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/evals/base.py +16 -5
- synth_ai/evals/client.py +1 -1
- synth_ai/http.py +8 -22
- synth_ai/inference/client.py +1 -1
- synth_ai/judge_schemas.py +4 -5
- synth_ai/learning/client.py +1 -1
- synth_ai/learning/health.py +1 -1
- synth_ai/learning/jobs.py +1 -1
- synth_ai/learning/rl/client.py +4 -2
- synth_ai/learning/rl/env_keys.py +1 -1
- synth_ai/learning/rl/secrets.py +1 -1
- synth_ai/learning/sft/client.py +1 -1
- synth_ai/learning/sft/data.py +407 -4
- synth_ai/learning/validators.py +4 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/apps/__init__.py +4 -2
- synth_ai/task/config.py +6 -4
- synth_ai/task/rubrics/__init__.py +1 -2
- synth_ai/task/rubrics/loaders.py +14 -10
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/trace_correlation_helpers.py +24 -11
- synth_ai/task/tracing_utils.py +14 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/config.py +15 -13
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +3 -1
- synth_ai/tracing_v3/decorators.py +10 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/session_tracer.py +7 -7
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/turso/daemon.py +8 -9
- synth_ai/tracing_v3/turso/native_manager.py +80 -72
- synth_ai/tracing_v3/utils.py +2 -2
- synth_ai/utils/__init__.py +101 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/cli.py +131 -0
- synth_ai/utils/env.py +294 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/modal.py +308 -0
- synth_ai/utils/process.py +212 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/v0/config/__init__.py +1 -5
- synth_ai/v0/config/base_url.py +1 -7
- synth_ai/v0/tracing/config.py +1 -1
- synth_ai/v0/tracing/decorators.py +1 -1
- synth_ai/v0/tracing/upload.py +1 -1
- synth_ai/v0/tracing_v1/config.py +1 -1
- synth_ai/v0/tracing_v1/decorators.py +1 -1
- synth_ai/v0/tracing_v1/upload.py +1 -1
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/tui.py +0 -57
- synth_ai/compound/cais.py +0 -0
- synth_ai/core/experiment.py +0 -13
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -295
- synth_ai/handshake.py +0 -109
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -906
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
"""Utility classes for running swe-mini environments on Morph Cloud."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import os
|
|
7
|
+
import shlex
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Any, Dict
|
|
11
|
+
|
|
12
|
+
_IMPORT_ERROR: Exception | None = None
|
|
13
|
+
|
|
14
|
+
try: # pragma: no cover - optional dependency
|
|
15
|
+
from morphcloud.api import MorphCloudClient
|
|
16
|
+
except Exception as exc: # pragma: no cover - optional dependency
|
|
17
|
+
MorphCloudClient = None # type: ignore[assignment]
|
|
18
|
+
_IMPORT_ERROR = exc
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _quote_env_var(key: str, value: str) -> str:
|
|
22
|
+
"""Return a safe shell export statement."""
|
|
23
|
+
return f"export {key}={shlex.quote(value)}"
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def _now() -> float:
|
|
27
|
+
return time.time()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
@dataclass
|
|
31
|
+
class MorphSandboxBackend:
|
|
32
|
+
"""Thin wrapper around Morph Cloud instances for command execution.
|
|
33
|
+
|
|
34
|
+
The API mirrors the subset consumed by :class:`MiniSweEnvironmentWrapper`:
|
|
35
|
+
we expose an ``execute`` method that matches the mini-swe environment shape.
|
|
36
|
+
"""
|
|
37
|
+
|
|
38
|
+
snapshot_id: str | None = None
|
|
39
|
+
image_id: str | None = None
|
|
40
|
+
cwd: str = "/workspace"
|
|
41
|
+
env: Dict[str, str] | None = None
|
|
42
|
+
metadata: Dict[str, str] | None = None
|
|
43
|
+
vcpus: int = 4
|
|
44
|
+
memory_mb: int = 8192
|
|
45
|
+
disk_mb: int = 65536
|
|
46
|
+
startup_timeout: int = 600
|
|
47
|
+
|
|
48
|
+
_client: MorphCloudClient = field(init=False)
|
|
49
|
+
_instance: Any = field(init=False, default=None)
|
|
50
|
+
_last_exec: Dict[str, Any] = field(init=False, default_factory=dict)
|
|
51
|
+
_started_at: float | None = field(init=False, default=None)
|
|
52
|
+
|
|
53
|
+
def __post_init__(self) -> None:
|
|
54
|
+
if MorphCloudClient is None: # pragma: no cover - optional dependency
|
|
55
|
+
raise RuntimeError(
|
|
56
|
+
"morphcloud package is required for Morph environments. "
|
|
57
|
+
"Install with `pip install morphcloud`."
|
|
58
|
+
) from _IMPORT_ERROR
|
|
59
|
+
|
|
60
|
+
api_key = os.getenv("MORPH_API_KEY", "")
|
|
61
|
+
if not api_key:
|
|
62
|
+
raise RuntimeError("Set MORPH_API_KEY before using the Morph backend.")
|
|
63
|
+
|
|
64
|
+
# Normalise metadata/env early to avoid shared references.
|
|
65
|
+
self.metadata = {str(k): str(v) for k, v in (self.metadata or {}).items()}
|
|
66
|
+
self.env = {str(k): str(v) for k, v in (self.env or {}).items()}
|
|
67
|
+
self.cwd = self.cwd or "/workspace"
|
|
68
|
+
|
|
69
|
+
self._client = MorphCloudClient()
|
|
70
|
+
|
|
71
|
+
# Public API -----------------------------------------------------------------
|
|
72
|
+
|
|
73
|
+
def execute(self, command: str, timeout: int | None = None) -> Dict[str, Any]:
|
|
74
|
+
"""Execute ``command`` inside the Morph instance."""
|
|
75
|
+
if not command.strip():
|
|
76
|
+
command = "true"
|
|
77
|
+
|
|
78
|
+
instance = self._ensure_instance()
|
|
79
|
+
|
|
80
|
+
script_parts = []
|
|
81
|
+
for key, value in self.env.items():
|
|
82
|
+
script_parts.append(_quote_env_var(key, value))
|
|
83
|
+
if self.cwd:
|
|
84
|
+
script_parts.append(f"cd {shlex.quote(self.cwd)}")
|
|
85
|
+
script_parts.append(command)
|
|
86
|
+
|
|
87
|
+
script = " && ".join(script_parts)
|
|
88
|
+
if timeout:
|
|
89
|
+
wrapped = f"timeout {int(timeout)}s bash -lc {shlex.quote(script)}"
|
|
90
|
+
else:
|
|
91
|
+
wrapped = script
|
|
92
|
+
|
|
93
|
+
shell_cmd = f"bash -lc {shlex.quote(wrapped)}"
|
|
94
|
+
started = _now()
|
|
95
|
+
result = instance.exec(shell_cmd)
|
|
96
|
+
duration = _now() - started
|
|
97
|
+
|
|
98
|
+
payload = {
|
|
99
|
+
"output": (result.stdout or ""),
|
|
100
|
+
"stderr": (result.stderr or ""),
|
|
101
|
+
"returncode": getattr(result, "exit_code", None),
|
|
102
|
+
"duration": duration,
|
|
103
|
+
}
|
|
104
|
+
self._last_exec = payload
|
|
105
|
+
return payload
|
|
106
|
+
|
|
107
|
+
def close(self) -> None:
|
|
108
|
+
"""Stops the Morph instance if one is running."""
|
|
109
|
+
instance = getattr(self, "_instance", None)
|
|
110
|
+
if not instance:
|
|
111
|
+
return
|
|
112
|
+
try:
|
|
113
|
+
instance.stop()
|
|
114
|
+
except Exception: # pragma: no cover - best-effort shutdown
|
|
115
|
+
pass
|
|
116
|
+
finally:
|
|
117
|
+
self._instance = None
|
|
118
|
+
|
|
119
|
+
# Internal helpers -----------------------------------------------------------
|
|
120
|
+
|
|
121
|
+
def _ensure_instance(self):
|
|
122
|
+
instance = getattr(self, "_instance", None)
|
|
123
|
+
if instance is not None:
|
|
124
|
+
return instance
|
|
125
|
+
|
|
126
|
+
snapshot_id = (
|
|
127
|
+
self.snapshot_id
|
|
128
|
+
or os.getenv("SWE_MINI_MORPH_SNAPSHOT_ID")
|
|
129
|
+
or os.getenv("MORPH_SNAPSHOT_ID")
|
|
130
|
+
)
|
|
131
|
+
metadata = dict(self.metadata)
|
|
132
|
+
|
|
133
|
+
if snapshot_id:
|
|
134
|
+
instance = self._client.instances.start(snapshot_id=snapshot_id, metadata=metadata or None)
|
|
135
|
+
else:
|
|
136
|
+
image_id = (
|
|
137
|
+
self.image_id
|
|
138
|
+
or os.getenv("SWE_MINI_MORPH_IMAGE_ID")
|
|
139
|
+
or os.getenv("MORPH_IMAGE_ID")
|
|
140
|
+
or "morphvm-minimal"
|
|
141
|
+
)
|
|
142
|
+
snapshot = self._client.snapshots.create(
|
|
143
|
+
image_id=image_id,
|
|
144
|
+
vcpus=self.vcpus,
|
|
145
|
+
memory=self.memory_mb,
|
|
146
|
+
disk_size=self.disk_mb,
|
|
147
|
+
)
|
|
148
|
+
instance = self._client.instances.start(snapshot_id=snapshot.id, metadata=metadata or None)
|
|
149
|
+
self.snapshot_id = snapshot.id
|
|
150
|
+
|
|
151
|
+
self._instance = instance
|
|
152
|
+
self._started_at = _now()
|
|
153
|
+
self._wait_until_ready(instance)
|
|
154
|
+
self._ensure_cwd(instance)
|
|
155
|
+
return instance
|
|
156
|
+
|
|
157
|
+
def _wait_until_ready(self, instance) -> None:
|
|
158
|
+
deadline = _now() + float(self.startup_timeout)
|
|
159
|
+
while True:
|
|
160
|
+
try:
|
|
161
|
+
instance.wait_until_ready()
|
|
162
|
+
break
|
|
163
|
+
except Exception as exc: # pragma: no cover - SDK may raise while polling
|
|
164
|
+
if _now() > deadline:
|
|
165
|
+
raise TimeoutError(f"Morph instance did not become ready within {self.startup_timeout}s") from exc
|
|
166
|
+
time.sleep(5.0)
|
|
167
|
+
|
|
168
|
+
def _ensure_cwd(self, instance) -> None:
|
|
169
|
+
if not self.cwd:
|
|
170
|
+
return
|
|
171
|
+
try:
|
|
172
|
+
instance.exec(f"bash -lc {shlex.quote(f'mkdir -p {self.cwd}')}")
|
|
173
|
+
except Exception as exc: # pragma: no cover - surface friendly error
|
|
174
|
+
raise RuntimeError(f"Failed to create remote workspace {self.cwd!r}: {exc}") from exc
|
|
175
|
+
|
|
176
|
+
def __del__(self) -> None: # pragma: no cover - defensive cleanup
|
|
177
|
+
with contextlib.suppress(Exception):
|
|
178
|
+
self.close()
|
|
@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
|
|
|
6
6
|
|
|
7
7
|
## Local development
|
|
8
8
|
```bash
|
|
9
|
-
uvx synth-ai
|
|
9
|
+
uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
|
|
10
10
|
# Optional extras:
|
|
11
11
|
# --env-file path/to/.env # load additional environment variables
|
|
12
12
|
# --reload # enable uvicorn auto-reload
|
|
@@ -7,10 +7,15 @@ import logging
|
|
|
7
7
|
import os
|
|
8
8
|
import sys
|
|
9
9
|
from collections.abc import Iterable, Sequence
|
|
10
|
+
from contextlib import suppress
|
|
10
11
|
from dataclasses import dataclass
|
|
12
|
+
from datetime import UTC, datetime
|
|
11
13
|
from pathlib import Path
|
|
12
14
|
from typing import Any
|
|
13
15
|
|
|
16
|
+
from fastapi import HTTPException
|
|
17
|
+
from pydantic import BaseModel
|
|
18
|
+
|
|
14
19
|
from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
|
|
15
20
|
from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
|
|
16
21
|
from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
|
|
@@ -614,16 +619,14 @@ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
|
|
|
614
619
|
|
|
615
620
|
coerced = request.model_copy(update={"env": updated_env, "policy": updated_policy, "ops": ops_override})
|
|
616
621
|
|
|
617
|
-
|
|
622
|
+
with suppress(Exception):
|
|
618
623
|
print(
|
|
619
624
|
"[rollout] remapped math request -> crafter "
|
|
620
625
|
f"(env={request.env.env_name!r}→{coerced.env.env_name!r}, "
|
|
621
626
|
f"policy={request.policy.policy_name!r}→{coerced.policy.policy_name!r})",
|
|
622
627
|
flush=True,
|
|
623
628
|
)
|
|
624
|
-
|
|
625
|
-
pass
|
|
626
|
-
try:
|
|
629
|
+
with suppress(Exception):
|
|
627
630
|
logger.info(
|
|
628
631
|
"ROLLOUT_ALIAS: remapped math env/policy to crafter (env=%s→%s, policy=%s→%s)",
|
|
629
632
|
request.env.env_name,
|
|
@@ -631,8 +634,6 @@ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
|
|
|
631
634
|
request.policy.policy_name,
|
|
632
635
|
coerced.policy.policy_name,
|
|
633
636
|
)
|
|
634
|
-
except Exception:
|
|
635
|
-
pass
|
|
636
637
|
|
|
637
638
|
return coerced
|
|
638
639
|
|
|
@@ -654,12 +655,20 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
|
|
|
654
655
|
if stripped:
|
|
655
656
|
return stripped
|
|
656
657
|
|
|
657
|
-
return extract_trace_correlation_id(policy_cfg.get("inference_url")
|
|
658
|
+
return extract_trace_correlation_id(policy_cfg.get("inference_url"))
|
|
658
659
|
|
|
659
660
|
|
|
660
661
|
async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
|
|
661
662
|
request = _coerce_math_to_crafter(request)
|
|
662
663
|
|
|
664
|
+
record_cfg = request.record.model_copy(
|
|
665
|
+
update={
|
|
666
|
+
"return_trace": True,
|
|
667
|
+
"trace_format": "structured",
|
|
668
|
+
}
|
|
669
|
+
)
|
|
670
|
+
request = request.model_copy(update={"record": record_cfg})
|
|
671
|
+
|
|
663
672
|
policy_cfg = dict(request.policy.config or {})
|
|
664
673
|
logger.info(
|
|
665
674
|
"ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
|
|
@@ -803,11 +812,38 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
803
812
|
trace_correlation_id,
|
|
804
813
|
)
|
|
805
814
|
data = legacy_response.model_dump()
|
|
815
|
+
logger.debug(
|
|
816
|
+
"ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
|
|
817
|
+
sorted(data.keys()),
|
|
818
|
+
bool(data.get("trace")),
|
|
819
|
+
)
|
|
806
820
|
metrics = data.get("metrics", {}) or {}
|
|
807
821
|
metrics.setdefault("outcome_score", None)
|
|
808
822
|
metrics.setdefault("events_score", None)
|
|
809
823
|
metrics.setdefault("details", {})
|
|
810
824
|
data["metrics"] = metrics
|
|
825
|
+
|
|
826
|
+
if data.get("trace") is None:
|
|
827
|
+
legacy_trace = getattr(legacy_response, "trace", None)
|
|
828
|
+
if legacy_trace is not None:
|
|
829
|
+
data["trace"] = legacy_trace
|
|
830
|
+
else:
|
|
831
|
+
tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
|
|
832
|
+
if callable(tracer_factory):
|
|
833
|
+
tracer = tracer_factory()
|
|
834
|
+
logger.debug("ROLLOUT_EXEC: trace backfill factory=%s", type(tracer))
|
|
835
|
+
if isinstance(tracer, SessionTracer):
|
|
836
|
+
try:
|
|
837
|
+
await tracer.initialize()
|
|
838
|
+
if tracer.db is not None:
|
|
839
|
+
trace_row = await tracer.db.get_session_trace(request.run_id)
|
|
840
|
+
if trace_row is not None:
|
|
841
|
+
data["trace"] = trace_row
|
|
842
|
+
except Exception as exc:
|
|
843
|
+
logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
|
|
844
|
+
finally:
|
|
845
|
+
with suppress(Exception):
|
|
846
|
+
await tracer.close()
|
|
811
847
|
|
|
812
848
|
# Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
|
|
813
849
|
# Use fallback if somehow missing
|
|
@@ -823,12 +859,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
823
859
|
if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
|
|
824
860
|
existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
|
|
825
861
|
data["pipeline_metadata"] = existing_meta
|
|
826
|
-
|
|
862
|
+
|
|
827
863
|
# Add trace_correlation_id to each trajectory (required for RL training pipeline)
|
|
828
864
|
if "trajectories" in data:
|
|
865
|
+
normalized_trajs: list[dict[str, Any]] = []
|
|
829
866
|
for traj in data.get("trajectories", []):
|
|
830
|
-
if isinstance(traj,
|
|
831
|
-
|
|
867
|
+
if isinstance(traj, BaseModel):
|
|
868
|
+
traj_dict = traj.model_dump()
|
|
869
|
+
elif isinstance(traj, dict):
|
|
870
|
+
traj_dict = dict(traj)
|
|
871
|
+
else:
|
|
872
|
+
continue
|
|
873
|
+
traj_dict["trace_correlation_id"] = final_cid
|
|
874
|
+
if not traj_dict.get("inference_url"):
|
|
875
|
+
inferred_url = policy_cfg.get("inference_url")
|
|
876
|
+
if inferred_url:
|
|
877
|
+
traj_dict["inference_url"] = inferred_url
|
|
878
|
+
normalized_trajs.append(traj_dict)
|
|
879
|
+
if normalized_trajs:
|
|
880
|
+
data["trajectories"] = normalized_trajs
|
|
881
|
+
logger.info(
|
|
882
|
+
"ROLLOUT_EXEC: normalized trajectory sample run_id=%s inference_url=%s",
|
|
883
|
+
request.run_id,
|
|
884
|
+
normalized_trajs[0].get("inference_url") if normalized_trajs else None,
|
|
885
|
+
)
|
|
832
886
|
logger.info(
|
|
833
887
|
"ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
|
|
834
888
|
request.run_id,
|
|
@@ -847,6 +901,12 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
|
|
|
847
901
|
request.run_id,
|
|
848
902
|
existing_meta,
|
|
849
903
|
)
|
|
904
|
+
|
|
905
|
+
if data.get("trace") is None:
|
|
906
|
+
raise HTTPException(
|
|
907
|
+
status_code=500,
|
|
908
|
+
detail="trace_payload_missing: task app did not emit a SessionTrace",
|
|
909
|
+
)
|
|
850
910
|
|
|
851
911
|
# ASSERTION: Verify trace_correlation_id is present in response at all required levels
|
|
852
912
|
assert "trace_correlation_id" in data, (
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
This module now delegates to the TaskAppConfig defined in the colocated example at
|
|
4
4
|
`examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
5
|
(running the file directly or targeting `fastapi_app` from external tooling). Prefer using
|
|
6
|
-
`uvx synth-ai
|
|
6
|
+
`uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
9
|
from __future__ import annotations
|
|
@@ -59,6 +59,13 @@ class CrafterPolicy(Policy):
|
|
|
59
59
|
self.trajectory_history: list[dict[str, Any]] = [] # env/policy step records
|
|
60
60
|
|
|
61
61
|
async def initialize(self, config: dict[str, Any]) -> None:
|
|
62
|
+
# DEBUG: Log the incoming config
|
|
63
|
+
import logging
|
|
64
|
+
_logger = logging.getLogger(__name__)
|
|
65
|
+
_logger.debug(f"🔊 [POLICY_INIT] Received config keys: {list(config.keys())}")
|
|
66
|
+
_logger.debug(f"🔊 [POLICY_INIT] use_vision in config: {'use_vision' in config}, value: {config.get('use_vision')}")
|
|
67
|
+
_logger.debug(f"🔊 [POLICY_INIT] image_only_mode in config: {'image_only_mode' in config}, value: {config.get('image_only_mode')}")
|
|
68
|
+
|
|
62
69
|
if "inference_url" in config:
|
|
63
70
|
self.inference_url = config["inference_url"]
|
|
64
71
|
if "model" in config:
|
|
@@ -67,6 +74,7 @@ class CrafterPolicy(Policy):
|
|
|
67
74
|
self.use_tools = bool(config["use_tools"])
|
|
68
75
|
if "use_vision" in config:
|
|
69
76
|
self.use_vision = bool(config["use_vision"])
|
|
77
|
+
_logger.debug(f"🔊 [POLICY_INIT] Set use_vision={self.use_vision} from config")
|
|
70
78
|
if "image_only_mode" in config:
|
|
71
79
|
self.image_only_mode = bool(config["image_only_mode"])
|
|
72
80
|
# If image_only_mode is enabled, automatically enable vision
|
|
@@ -97,6 +105,9 @@ class CrafterPolicy(Policy):
|
|
|
97
105
|
self.history_messages = []
|
|
98
106
|
self.turn_index = 0
|
|
99
107
|
self.trajectory_history = []
|
|
108
|
+
|
|
109
|
+
# DEBUG: Log final state
|
|
110
|
+
_logger.debug(f"🔊 [POLICY_INIT] FINAL STATE: use_vision={self.use_vision}, image_only_mode={self.image_only_mode}, model={self.model}")
|
|
100
111
|
|
|
101
112
|
def _append_user_observation(self, observation_text: str) -> None:
|
|
102
113
|
self.history_messages.append({"role": "user", "content": observation_text})
|
|
@@ -131,10 +142,36 @@ class CrafterPolicy(Policy):
|
|
|
131
142
|
history=history,
|
|
132
143
|
turn=turn,
|
|
133
144
|
image_parts=image_parts,
|
|
145
|
+
image_only_mode=self.image_only_mode,
|
|
134
146
|
)
|
|
147
|
+
|
|
148
|
+
# DEBUG: Log message structure
|
|
149
|
+
import logging
|
|
150
|
+
_logger = logging.getLogger(__name__)
|
|
151
|
+
_logger.debug(f"🔊 [BUILD_REQUEST] Built {len(messages)} messages")
|
|
152
|
+
for idx, msg in enumerate(messages):
|
|
153
|
+
role = msg.get("role")
|
|
154
|
+
content = msg.get("content")
|
|
155
|
+
if isinstance(content, list):
|
|
156
|
+
_logger.debug(f"🔊 [BUILD_REQUEST] Message[{idx}] role={role}, content=list[{len(content)}]")
|
|
157
|
+
for part_idx, part in enumerate(content):
|
|
158
|
+
if isinstance(part, dict):
|
|
159
|
+
part_type = part.get("type")
|
|
160
|
+
_logger.debug(f"🔊 [BUILD_REQUEST] Part[{part_idx}]: type={part_type}")
|
|
161
|
+
else:
|
|
162
|
+
content_len = len(str(content)) if content else 0
|
|
163
|
+
_logger.debug(f"🔊 [BUILD_REQUEST] Message[{idx}] role={role}, content_len={content_len}")
|
|
164
|
+
|
|
135
165
|
payload: dict[str, Any] = {
|
|
136
166
|
"messages": messages,
|
|
137
167
|
}
|
|
168
|
+
|
|
169
|
+
# DEBUG: Verify messages are in payload correctly
|
|
170
|
+
_logger.debug(f"🔊 [BUILD_REQUEST_PAYLOAD] Created payload with {len(payload['messages'])} messages")
|
|
171
|
+
for idx, msg in enumerate(payload["messages"]):
|
|
172
|
+
content = msg.get("content")
|
|
173
|
+
_logger.debug(f"🔊 [BUILD_REQUEST_PAYLOAD] Payload message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}, len={len(content) if isinstance(content, list) else len(str(content)) if content else 0}")
|
|
174
|
+
|
|
138
175
|
if self.model is not None:
|
|
139
176
|
payload["model"] = self.model
|
|
140
177
|
# Thinking controls
|
|
@@ -160,6 +197,8 @@ class CrafterPolicy(Policy):
|
|
|
160
197
|
if self.use_tools:
|
|
161
198
|
payload["tools"] = TOOLS_SCHEMA
|
|
162
199
|
payload["tool_choice"] = "required"
|
|
200
|
+
payload["function_call"] = {"name": "interact_many"}
|
|
201
|
+
payload["parallel_tool_calls"] = False
|
|
163
202
|
# Ensure the inference server injects family-specific stop sequences
|
|
164
203
|
# to terminate immediately after the first tool call for compliance.
|
|
165
204
|
payload["stop_after_tool_calls"] = 1
|
|
@@ -170,13 +209,7 @@ class CrafterPolicy(Policy):
|
|
|
170
209
|
response: dict[str, Any],
|
|
171
210
|
use_tools: bool = True,
|
|
172
211
|
) -> list[dict[str, Any]]:
|
|
173
|
-
"""Turn an inference response into environment tool calls.
|
|
174
|
-
|
|
175
|
-
- If tools were used, expect tool_calls-compatible output and forward as-is
|
|
176
|
-
in our simple JSON format: {"tool_name": str, "arguments": {...}}.
|
|
177
|
-
- If no tools, parse plain-text actions using CrafterReActAgent parser and
|
|
178
|
-
wrap them into a single interact_many tool call.
|
|
179
|
-
"""
|
|
212
|
+
"""Turn an inference response into environment tool calls."""
|
|
180
213
|
# First check if we got actual tool calls
|
|
181
214
|
choices = response.get("choices", [])
|
|
182
215
|
tool_calls: list[dict[str, Any]] = []
|
|
@@ -235,24 +268,6 @@ class CrafterPolicy(Policy):
|
|
|
235
268
|
normalized.append(tc)
|
|
236
269
|
return normalized
|
|
237
270
|
|
|
238
|
-
# Otherwise, parse plain text content for actions
|
|
239
|
-
text = ""
|
|
240
|
-
for choice in choices:
|
|
241
|
-
msg = choice.get("message", {})
|
|
242
|
-
content = msg.get("content", "")
|
|
243
|
-
if content:
|
|
244
|
-
text = content
|
|
245
|
-
break
|
|
246
|
-
|
|
247
|
-
if text:
|
|
248
|
-
# Try to parse actions from the text
|
|
249
|
-
from .shared import parse_actions
|
|
250
|
-
|
|
251
|
-
actions = parse_actions(text)
|
|
252
|
-
if actions:
|
|
253
|
-
# Wrap actions in interact_many tool call
|
|
254
|
-
return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
|
|
255
|
-
|
|
256
271
|
# No actions found
|
|
257
272
|
return []
|
|
258
273
|
|
|
@@ -360,7 +375,18 @@ class CrafterPolicy(Policy):
|
|
|
360
375
|
raw_candidate = metadata.get("raw_observation")
|
|
361
376
|
if isinstance(raw_candidate, dict):
|
|
362
377
|
raw_observation = raw_candidate
|
|
378
|
+
|
|
379
|
+
# DEBUG: Log image extraction
|
|
380
|
+
import logging
|
|
381
|
+
_logger = logging.getLogger(__name__)
|
|
382
|
+
_logger.debug(f"🔊 [POLICY] use_vision={self.use_vision}, has_raw_obs={raw_observation is not None}")
|
|
383
|
+
if raw_observation:
|
|
384
|
+
obs = raw_observation.get("observation", raw_observation)
|
|
385
|
+
data_url = obs.get("observation_image_data_url") if isinstance(obs, dict) else None
|
|
386
|
+
_logger.debug(f"🔊 [POLICY] has_data_url={data_url is not None}, url_preview={data_url[:50] if data_url else 'NONE'}...")
|
|
387
|
+
|
|
363
388
|
image_parts = self._extract_image_parts(raw_observation)
|
|
389
|
+
_logger.debug(f"🔊 [POLICY] Extracted {len(image_parts)} image parts")
|
|
364
390
|
|
|
365
391
|
payload = self.build_inference_request(
|
|
366
392
|
combined_text,
|
|
@@ -368,7 +394,17 @@ class CrafterPolicy(Policy):
|
|
|
368
394
|
turn=self.turn_index,
|
|
369
395
|
image_parts=image_parts,
|
|
370
396
|
)
|
|
371
|
-
|
|
397
|
+
|
|
398
|
+
# DEBUG: Verify payload before returning
|
|
399
|
+
_logger.debug(f"🔊 [POLICY_STEP_RETURN] About to return payload with {len(payload.get('messages', []))} messages")
|
|
400
|
+
for idx, msg in enumerate(payload.get("messages", [])):
|
|
401
|
+
content = msg.get("content")
|
|
402
|
+
_logger.debug(f"🔊 [POLICY_STEP_RETURN] Return message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
|
|
403
|
+
if isinstance(content, list):
|
|
404
|
+
_logger.debug(f"🔊 [POLICY_STEP_RETURN] Content list has {len(content)} items")
|
|
405
|
+
# Add assertion to catch corruption early
|
|
406
|
+
assert len(content) > 0, f"Message content list is empty! This should contain images."
|
|
407
|
+
|
|
372
408
|
meta_out = {
|
|
373
409
|
"inference_url": self.inference_url,
|
|
374
410
|
"inference_request": payload,
|
|
@@ -484,7 +520,7 @@ class CrafterPolicy(Policy):
|
|
|
484
520
|
"claude-3", # All Claude 3 models support vision
|
|
485
521
|
"gemini", # Gemini models
|
|
486
522
|
"qwen-vl", # Qwen Vision-Language models
|
|
487
|
-
"
|
|
523
|
+
"qwen3-vl", # Qwen3 VL
|
|
488
524
|
"pixtral", # Mistral's vision model
|
|
489
525
|
"llava", # LLaVA models
|
|
490
526
|
"phi-3-vision", # Microsoft Phi-3 Vision
|
|
@@ -45,8 +45,7 @@ class CrafterReActAgent:
|
|
|
45
45
|
"Action policy:\n"
|
|
46
46
|
"- Always return a single tool call: interact_many({actions: [...]})\n"
|
|
47
47
|
"- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
|
|
48
|
-
"- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
|
|
49
|
-
"- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
|
|
48
|
+
"- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n\n"
|
|
50
49
|
"Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
|
|
51
50
|
"place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
|
|
52
51
|
"make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
|
|
@@ -50,20 +50,19 @@ class OpenAIClient:
|
|
|
50
50
|
# Make a copy to avoid modifying the original
|
|
51
51
|
fixed_request = request.copy()
|
|
52
52
|
|
|
53
|
-
# Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI
|
|
54
|
-
#
|
|
53
|
+
# Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI).
|
|
54
|
+
# Groq shares the API surface but we keep tool enforcement fields intact.
|
|
55
55
|
is_openai = False
|
|
56
|
+
is_groq = False
|
|
56
57
|
try:
|
|
57
58
|
if isinstance(target_url, str):
|
|
58
59
|
low = target_url.lower()
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
or ("/proxy/openai" in low)
|
|
66
|
-
)
|
|
60
|
+
if "groq.com" in low or "/proxy/groq" in low:
|
|
61
|
+
is_groq = True
|
|
62
|
+
elif ("openai.com" in low) or ("azure" in low and ".openai." in low) or (
|
|
63
|
+
"/proxy/openai" in low
|
|
64
|
+
):
|
|
65
|
+
is_openai = True
|
|
67
66
|
except Exception:
|
|
68
67
|
is_openai = False
|
|
69
68
|
|
|
@@ -218,8 +217,20 @@ class OpenAIClient:
|
|
|
218
217
|
# Do NOT fall back silently; surface the error so callers fail fast
|
|
219
218
|
raise
|
|
220
219
|
|
|
220
|
+
# DEBUG: Log request BEFORE _fix_model_parameters
|
|
221
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Request message[1] content type: {type(request.get('messages', [])[1].get('content') if len(request.get('messages', [])) > 1 else None)}")
|
|
222
|
+
if len(request.get("messages", [])) > 1:
|
|
223
|
+
msg1_content = request["messages"][1].get("content")
|
|
224
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Message[1] content value: {msg1_content if not isinstance(msg1_content, list) else f'list[{len(msg1_content)}]'}")
|
|
225
|
+
|
|
221
226
|
# Fix parameter compatibility for newer models
|
|
222
227
|
processed_request = self._fix_model_parameters(request, target_url=url)
|
|
228
|
+
|
|
229
|
+
# DEBUG: Log request AFTER _fix_model_parameters
|
|
230
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Processed message[1] content type: {type(processed_request.get('messages', [])[1].get('content') if len(processed_request.get('messages', [])) > 1 else None)}")
|
|
231
|
+
if len(processed_request.get("messages", [])) > 1:
|
|
232
|
+
msg1_content_post = processed_request["messages"][1].get("content")
|
|
233
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
|
|
223
234
|
|
|
224
235
|
# Log request (redact messages in production)
|
|
225
236
|
logger.info(f"Inference POST target: {url}")
|
|
@@ -228,14 +239,32 @@ class OpenAIClient:
|
|
|
228
239
|
with contextlib.suppress(Exception):
|
|
229
240
|
keys_preview = sorted(processed_request.keys())
|
|
230
241
|
logger.info(f"Request keys: {keys_preview}")
|
|
231
|
-
|
|
232
|
-
|
|
242
|
+
# DEBUG: Log message structure for vision debugging
|
|
243
|
+
if "messages" in processed_request:
|
|
244
|
+
msgs = processed_request["messages"]
|
|
245
|
+
if isinstance(msgs, list):
|
|
246
|
+
logger.debug(f"🔊 [OPENAI_CLIENT] Request has {len(msgs)} messages")
|
|
247
|
+
for idx, msg in enumerate(msgs):
|
|
248
|
+
if isinstance(msg, dict):
|
|
249
|
+
role = msg.get("role")
|
|
250
|
+
content = msg.get("content")
|
|
251
|
+
if isinstance(content, list):
|
|
252
|
+
logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content=list[{len(content)}]")
|
|
253
|
+
for part_idx, part in enumerate(content):
|
|
254
|
+
if isinstance(part, dict):
|
|
255
|
+
part_type = part.get("type")
|
|
256
|
+
logger.debug(f"🔊 [OPENAI_CLIENT] Part[{part_idx}]: type={part_type}")
|
|
257
|
+
else:
|
|
258
|
+
content_len = len(str(content)) if content else 0
|
|
259
|
+
logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
|
|
260
|
+
|
|
261
|
+
# Final hard-guard for OpenAI/Groq: drop unsupported field
|
|
233
262
|
try:
|
|
234
|
-
|
|
263
|
+
low_url = url.lower()
|
|
264
|
+
if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
|
|
235
265
|
processed_request.pop("stop_after_tool_calls", None)
|
|
236
|
-
logger.info("Removed stop_after_tool_calls for
|
|
266
|
+
logger.info("Removed stop_after_tool_calls for %s request", "Groq/OpenAI")
|
|
237
267
|
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
238
|
-
low_url = url.lower()
|
|
239
268
|
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
240
269
|
processed_request, dict
|
|
241
270
|
):
|
|
@@ -516,47 +545,16 @@ class OpenAIClient:
|
|
|
516
545
|
error_block.get("code") or error_block.get("type") or ""
|
|
517
546
|
).lower()
|
|
518
547
|
if error_code in {"tool_use_failed", "tool_call_failed"}:
|
|
519
|
-
logger.
|
|
548
|
+
logger.error(
|
|
520
549
|
{
|
|
521
550
|
"tool_use_failed": True,
|
|
522
551
|
"target": (base_url or self.base_url),
|
|
523
552
|
"message": error_block.get("message") if isinstance(error_block, dict) else None,
|
|
524
553
|
}
|
|
525
554
|
)
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
"object": "chat.completion",
|
|
530
|
-
"created": int(time.time()),
|
|
531
|
-
"model": processed_request.get("model"),
|
|
532
|
-
"choices": [
|
|
533
|
-
{
|
|
534
|
-
"index": 0,
|
|
535
|
-
"message": {
|
|
536
|
-
"role": "assistant",
|
|
537
|
-
"content": "",
|
|
538
|
-
"tool_calls": [
|
|
539
|
-
{
|
|
540
|
-
"id": f"call_fallback_{int(time.time() * 1000)}",
|
|
541
|
-
"type": "function",
|
|
542
|
-
"function": {
|
|
543
|
-
"name": "interact_many",
|
|
544
|
-
"arguments": json.dumps(
|
|
545
|
-
{"actions": fallback_actions}
|
|
546
|
-
),
|
|
547
|
-
},
|
|
548
|
-
}
|
|
549
|
-
],
|
|
550
|
-
},
|
|
551
|
-
"finish_reason": "tool_calls",
|
|
552
|
-
}
|
|
553
|
-
],
|
|
554
|
-
}
|
|
555
|
-
if isinstance(response_data.get("usage"), dict):
|
|
556
|
-
fallback_response["usage"] = response_data["usage"]
|
|
557
|
-
if isinstance(error_block, dict):
|
|
558
|
-
fallback_response["error"] = error_block
|
|
559
|
-
return fallback_response
|
|
555
|
+
raise RuntimeError(
|
|
556
|
+
f"Inference 400 response (tool call failed): {error_block.get('message') if isinstance(error_block, dict) else 'Tool call failed'}"
|
|
557
|
+
) from e
|
|
560
558
|
# This is a different type of 400 error, don't retry
|
|
561
559
|
try:
|
|
562
560
|
redacted_headers = {}
|