PyPI - synth-ai - Versions diffs - 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

synth-ai 0.2.14py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show

examples/README.md +1 -0
examples/analyze_semantic_words.sh +2 -2
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/SFT_README.md +147 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/multi_step/convert_traces_to_sft.py +84 -0
examples/multi_step/run_sft_qwen30b.sh +45 -0
examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -2
examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
examples/qwen_vl/QUICKSTART.md +327 -0
examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
examples/qwen_vl/README.md +152 -0
examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
examples/qwen_vl/RL_VISION_TESTING.md +333 -0
examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
examples/qwen_vl/SETUP_COMPLETE.md +274 -0
examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
examples/qwen_vl/__init__.py +2 -0
examples/qwen_vl/collect_data_via_cli.md +415 -0
examples/qwen_vl/collect_vision_traces.py +368 -0
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
examples/qwen_vl/configs/filter_vision_test.toml +8 -0
examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
examples/qwen_vl/run_vision_comparison.sh +61 -0
examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
examples/qwen_vl/test_image_validation.py +201 -0
examples/qwen_vl/test_sft_vision_data.py +110 -0
examples/rl/README.md +6 -6
examples/rl/configs/eval_base_qwen.toml +17 -0
examples/rl/configs/eval_rl_qwen.toml +13 -0
examples/rl/configs/rl_from_base_qwen.toml +62 -0
examples/rl/configs/rl_from_base_qwen17.toml +79 -0
examples/rl/configs/rl_from_ft_qwen.toml +37 -0
examples/rl/run_eval.py +436 -0
examples/rl/run_rl_and_save.py +111 -0
examples/rl/task_app/README.md +21 -0
examples/rl/task_app/math_single_step.py +990 -0
examples/rl/task_app/math_task_app.py +111 -0
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +6 -6
examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
examples/sft/evaluate.py +2 -4
examples/sft/export_dataset.py +7 -4
examples/swe/task_app/README.md +33 -3
examples/swe/task_app/grpo_swe_mini.py +4 -1
examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
examples/swe/task_app/hosted/inference/openai_client.py +4 -4
examples/swe/task_app/hosted/policy_routes.py +0 -2
examples/swe/task_app/hosted/rollout.py +0 -8
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
examples/task_apps/enron/__init__.py +1 -0
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +36 -5
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/README.md +3 -3
examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
examples/vlm/crafter_openai_vlm_agent.py +3 -5
examples/vlm/filter_image_rows.py +1 -1
examples/vlm/run_crafter_vlm_benchmark.py +2 -2
examples/warming_up_to_rl/_utils.py +92 -0
examples/warming_up_to_rl/analyze_trace_db.py +1 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
examples/warming_up_to_rl/export_trace_sft.py +174 -60
examples/warming_up_to_rl/readme.md +63 -132
examples/warming_up_to_rl/run_fft_and_save.py +1 -1
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/run_rl_and_save.py +1 -1
examples/warming_up_to_rl/task_app/README.md +42 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
synth_ai/__init__.py +44 -30
synth_ai/_utils/__init__.py +47 -0
synth_ai/_utils/base_url.py +10 -0
synth_ai/_utils/http.py +10 -0
synth_ai/_utils/prompts.py +10 -0
synth_ai/_utils/task_app_state.py +12 -0
synth_ai/_utils/user_config.py +10 -0
synth_ai/api/models/supported.py +144 -7
synth_ai/api/train/__init__.py +13 -1
synth_ai/api/train/builders.py +9 -3
synth_ai/api/train/cli.py +155 -17
synth_ai/api/train/config_finder.py +18 -11
synth_ai/api/train/configs/__init__.py +8 -1
synth_ai/api/train/configs/rl.py +32 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/env_resolver.py +13 -10
synth_ai/auth/credentials.py +119 -0
synth_ai/cli/__init__.py +61 -69
synth_ai/cli/_modal_wrapper.py +7 -5
synth_ai/cli/_typer_patch.py +0 -2
synth_ai/cli/_validate_task_app.py +22 -4
synth_ai/cli/commands/__init__.py +17 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/deploy/__init__.py +23 -0
synth_ai/cli/commands/deploy/core.py +614 -0
synth_ai/cli/commands/deploy/errors.py +72 -0
synth_ai/cli/commands/deploy/validation.py +11 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1109 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +388 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +73 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +199 -0
synth_ai/cli/commands/train/judge_validation.py +304 -0
synth_ai/cli/commands/train/validation.py +443 -0
synth_ai/cli/demo.py +2 -162
synth_ai/cli/deploy/__init__.py +28 -0
synth_ai/cli/deploy/core.py +5 -0
synth_ai/cli/deploy/errors.py +23 -0
synth_ai/cli/deploy/validation.py +5 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +3 -1
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/task_app_discovery.py +7 -0
synth_ai/cli/lib/task_app_env.py +518 -0
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/recent.py +2 -1
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +21 -0
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +7 -0
synth_ai/cli/task_app_list.py +25 -0
synth_ai/cli/task_app_modal_serve.py +11 -0
synth_ai/cli/task_app_serve.py +11 -0
synth_ai/cli/task_apps.py +110 -1499
synth_ai/cli/traces.py +1 -1
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +5 -0
synth_ai/cli/turso.py +1 -1
synth_ai/cli/watch.py +1 -1
synth_ai/demos/__init__.py +10 -0
synth_ai/demos/core/__init__.py +28 -1
synth_ai/demos/crafter/__init__.py +1 -0
synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/demos/demo_registry.py +176 -0
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/math/__init__.py +1 -0
synth_ai/demos/math/_common.py +16 -0
synth_ai/demos/math/app.py +38 -0
synth_ai/demos/math/config.toml +76 -0
synth_ai/demos/math/deploy_modal.py +54 -0
synth_ai/demos/math/modal_task_app.py +702 -0
synth_ai/demos/math/task_app_entry.py +51 -0
synth_ai/environments/environment/core.py +7 -1
synth_ai/environments/examples/bandit/engine.py +0 -1
synth_ai/environments/examples/bandit/environment.py +0 -1
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/environments/examples/wordle/environment.py +0 -1
synth_ai/evals/base.py +16 -5
synth_ai/evals/client.py +1 -1
synth_ai/http.py +8 -22
synth_ai/inference/client.py +1 -1
synth_ai/judge_schemas.py +4 -5
synth_ai/learning/client.py +1 -1
synth_ai/learning/health.py +1 -1
synth_ai/learning/jobs.py +1 -1
synth_ai/learning/rl/client.py +4 -2
synth_ai/learning/rl/env_keys.py +1 -1
synth_ai/learning/rl/secrets.py +1 -1
synth_ai/learning/sft/client.py +1 -1
synth_ai/learning/sft/data.py +407 -4
synth_ai/learning/validators.py +4 -1
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +469 -0
synth_ai/streaming/streamer.py +301 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +4 -2
synth_ai/task/config.py +6 -4
synth_ai/task/rubrics/__init__.py +1 -2
synth_ai/task/rubrics/loaders.py +14 -10
synth_ai/task/rubrics.py +219 -0
synth_ai/task/trace_correlation_helpers.py +24 -11
synth_ai/task/tracing_utils.py +14 -3
synth_ai/task/validators.py +0 -1
synth_ai/tracing_v3/abstractions.py +3 -3
synth_ai/tracing_v3/config.py +15 -13
synth_ai/tracing_v3/constants.py +21 -0
synth_ai/tracing_v3/db_config.py +3 -1
synth_ai/tracing_v3/decorators.py +10 -7
synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/session_tracer.py +7 -7
synth_ai/tracing_v3/storage/base.py +29 -29
synth_ai/tracing_v3/storage/config.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +8 -9
synth_ai/tracing_v3/turso/native_manager.py +80 -72
synth_ai/tracing_v3/utils.py +2 -2
synth_ai/utils/__init__.py +101 -0
synth_ai/utils/base_url.py +94 -0
synth_ai/utils/cli.py +131 -0
synth_ai/utils/env.py +294 -0
synth_ai/utils/http.py +172 -0
synth_ai/utils/modal.py +308 -0
synth_ai/utils/process.py +212 -0
synth_ai/utils/prompts.py +39 -0
synth_ai/utils/sqld.py +122 -0
synth_ai/utils/task_app_discovery.py +882 -0
synth_ai/utils/task_app_env.py +186 -0
synth_ai/utils/task_app_state.py +318 -0
synth_ai/utils/user_config.py +137 -0
synth_ai/v0/config/__init__.py +1 -5
synth_ai/v0/config/base_url.py +1 -7
synth_ai/v0/tracing/config.py +1 -1
synth_ai/v0/tracing/decorators.py +1 -1
synth_ai/v0/tracing/upload.py +1 -1
synth_ai/v0/tracing_v1/config.py +1 -1
synth_ai/v0/tracing_v1/decorators.py +1 -1
synth_ai/v0/tracing_v1/upload.py +1 -1
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
synth_ai/cli/man.py +0 -106
synth_ai/cli/tui.py +0 -57
synth_ai/compound/cais.py +0 -0
synth_ai/core/experiment.py +0 -13
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -295
synth_ai/handshake.py +0 -109
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -906
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0

examples/swe/task_app/morph_backend.py ADDED Viewed

@@ -0,0 +1,178 @@
+"""Utility classes for running swe-mini environments on Morph Cloud."""
+from __future__ import annotations
+import contextlib
+import os
+import shlex
+import time
+from dataclasses import dataclass, field
+from typing import Any, Dict
+_IMPORT_ERROR: Exception | None = None
+try:  # pragma: no cover - optional dependency
+    from morphcloud.api import MorphCloudClient
+except Exception as exc:  # pragma: no cover - optional dependency
+    MorphCloudClient = None  # type: ignore[assignment]
+    _IMPORT_ERROR = exc
+def _quote_env_var(key: str, value: str) -> str:
+    """Return a safe shell export statement."""
+    return f"export {key}={shlex.quote(value)}"
+def _now() -> float:
+    return time.time()
+@dataclass
+class MorphSandboxBackend:
+    """Thin wrapper around Morph Cloud instances for command execution.
+    The API mirrors the subset consumed by :class:`MiniSweEnvironmentWrapper`:
+    we expose an ``execute`` method that matches the mini-swe environment shape.
+    """
+    snapshot_id: str | None = None
+    image_id: str | None = None
+    cwd: str = "/workspace"
+    env: Dict[str, str] | None = None
+    metadata: Dict[str, str] | None = None
+    vcpus: int = 4
+    memory_mb: int = 8192
+    disk_mb: int = 65536
+    startup_timeout: int = 600
+    _client: MorphCloudClient = field(init=False)
+    _instance: Any = field(init=False, default=None)
+    _last_exec: Dict[str, Any] = field(init=False, default_factory=dict)
+    _started_at: float | None = field(init=False, default=None)
+    def __post_init__(self) -> None:
+        if MorphCloudClient is None:  # pragma: no cover - optional dependency
+            raise RuntimeError(
+                "morphcloud package is required for Morph environments. "
+                "Install with `pip install morphcloud`."
+            ) from _IMPORT_ERROR
+        api_key = os.getenv("MORPH_API_KEY", "")
+        if not api_key:
+            raise RuntimeError("Set MORPH_API_KEY before using the Morph backend.")
+        # Normalise metadata/env early to avoid shared references.
+        self.metadata = {str(k): str(v) for k, v in (self.metadata or {}).items()}
+        self.env = {str(k): str(v) for k, v in (self.env or {}).items()}
+        self.cwd = self.cwd or "/workspace"
+        self._client = MorphCloudClient()
+    # Public API -----------------------------------------------------------------
+    def execute(self, command: str, timeout: int | None = None) -> Dict[str, Any]:
+        """Execute ``command`` inside the Morph instance."""
+        if not command.strip():
+            command = "true"
+        instance = self._ensure_instance()
+        script_parts = []
+        for key, value in self.env.items():
+            script_parts.append(_quote_env_var(key, value))
+        if self.cwd:
+            script_parts.append(f"cd {shlex.quote(self.cwd)}")
+        script_parts.append(command)
+        script = " && ".join(script_parts)
+        if timeout:
+            wrapped = f"timeout {int(timeout)}s bash -lc {shlex.quote(script)}"
+        else:
+            wrapped = script
+        shell_cmd = f"bash -lc {shlex.quote(wrapped)}"
+        started = _now()
+        result = instance.exec(shell_cmd)
+        duration = _now() - started
+        payload = {
+            "output": (result.stdout or ""),
+            "stderr": (result.stderr or ""),
+            "returncode": getattr(result, "exit_code", None),
+            "duration": duration,
+        }
+        self._last_exec = payload
+        return payload
+    def close(self) -> None:
+        """Stops the Morph instance if one is running."""
+        instance = getattr(self, "_instance", None)
+        if not instance:
+            return
+        try:
+            instance.stop()
+        except Exception:  # pragma: no cover - best-effort shutdown
+            pass
+        finally:
+            self._instance = None
+    # Internal helpers -----------------------------------------------------------
+    def _ensure_instance(self):
+        instance = getattr(self, "_instance", None)
+        if instance is not None:
+            return instance
+        snapshot_id = (
+            self.snapshot_id
+            or os.getenv("SWE_MINI_MORPH_SNAPSHOT_ID")
+            or os.getenv("MORPH_SNAPSHOT_ID")
+        )
+        metadata = dict(self.metadata)
+        if snapshot_id:
+            instance = self._client.instances.start(snapshot_id=snapshot_id, metadata=metadata or None)
+        else:
+            image_id = (
+                self.image_id
+                or os.getenv("SWE_MINI_MORPH_IMAGE_ID")
+                or os.getenv("MORPH_IMAGE_ID")
+                or "morphvm-minimal"
+            )
+            snapshot = self._client.snapshots.create(
+                image_id=image_id,
+                vcpus=self.vcpus,
+                memory=self.memory_mb,
+                disk_size=self.disk_mb,
+            )
+            instance = self._client.instances.start(snapshot_id=snapshot.id, metadata=metadata or None)
+            self.snapshot_id = snapshot.id
+        self._instance = instance
+        self._started_at = _now()
+        self._wait_until_ready(instance)
+        self._ensure_cwd(instance)
+        return instance
+    def _wait_until_ready(self, instance) -> None:
+        deadline = _now() + float(self.startup_timeout)
+        while True:
+            try:
+                instance.wait_until_ready()
+                break
+            except Exception as exc:  # pragma: no cover - SDK may raise while polling
+                if _now() > deadline:
+                    raise TimeoutError(f"Morph instance did not become ready within {self.startup_timeout}s") from exc
+                time.sleep(5.0)
+    def _ensure_cwd(self, instance) -> None:
+        if not self.cwd:
+            return
+        try:
+            instance.exec(f"bash -lc {shlex.quote(f'mkdir -p {self.cwd}')}")
+        except Exception as exc:  # pragma: no cover - surface friendly error
+            raise RuntimeError(f"Failed to create remote workspace {self.cwd!r}: {exc}") from exc
+    def __del__(self) -> None:  # pragma: no cover - defensive cleanup
+        with contextlib.suppress(Exception):
+            self.close()

examples/task_apps/crafter/task_app/README.md CHANGED Viewed

@@ -6,7 +6,7 @@ underlying FastAPI plumbing.
 ## Local development
 ```bash
-uvx synth-ai serve grpo-crafter --port 8001
+uvx synth-ai deploy --runtime uvicorn grpo-crafter --port 8001
 # Optional extras:
 #   --env-file path/to/.env    # load additional environment variables
 #   --reload                   # enable uvicorn auto-reload

examples/task_apps/crafter/task_app/grpo_crafter.py CHANGED Viewed

@@ -7,10 +7,15 @@ import logging
 import os
 import sys
 from collections.abc import Iterable, Sequence
+from contextlib import suppress
 from dataclasses import dataclass
+from datetime import UTC, datetime
 from pathlib import Path
 from typing import Any
+from fastapi import HTTPException
+from pydantic import BaseModel
 from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
 from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
 from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
@@ -614,16 +619,14 @@ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
     coerced = request.model_copy(update={"env": updated_env, "policy": updated_policy, "ops": ops_override})
-    try:
+    with suppress(Exception):
         print(
             "[rollout] remapped math request -> crafter "
             f"(env={request.env.env_name!r}→{coerced.env.env_name!r}, "
             f"policy={request.policy.policy_name!r}→{coerced.policy.policy_name!r})",
             flush=True,
         )
-    except Exception:
-        pass
-    try:
+    with suppress(Exception):
         logger.info(
             "ROLLOUT_ALIAS: remapped math env/policy to crafter (env=%s→%s, policy=%s→%s)",
             request.env.env_name,
@@ -631,8 +634,6 @@ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
             request.policy.policy_name,
             coerced.policy.policy_name,
         )
-    except Exception:
-        pass
     return coerced
@@ -654,12 +655,20 @@ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None)
             if stripped:
                 return stripped
-    return extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=mode)
+    return extract_trace_correlation_id(policy_cfg.get("inference_url"))
 async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
     request = _coerce_math_to_crafter(request)
+    record_cfg = request.record.model_copy(
+        update={
+            "return_trace": True,
+            "trace_format": "structured",
+        }
+    )
+    request = request.model_copy(update={"record": record_cfg})
     policy_cfg = dict(request.policy.config or {})
     logger.info(
         "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
@@ -803,11 +812,38 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
         trace_correlation_id,
     )
     data = legacy_response.model_dump()
+    logger.debug(
+        "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
+        sorted(data.keys()),
+        bool(data.get("trace")),
+    )
     metrics = data.get("metrics", {}) or {}
     metrics.setdefault("outcome_score", None)
     metrics.setdefault("events_score", None)
     metrics.setdefault("details", {})
     data["metrics"] = metrics
+    if data.get("trace") is None:
+        legacy_trace = getattr(legacy_response, "trace", None)
+        if legacy_trace is not None:
+            data["trace"] = legacy_trace
+        else:
+            tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
+            if callable(tracer_factory):
+                tracer = tracer_factory()
+                logger.debug("ROLLOUT_EXEC: trace backfill factory=%s", type(tracer))
+                if isinstance(tracer, SessionTracer):
+                    try:
+                        await tracer.initialize()
+                        if tracer.db is not None:
+                            trace_row = await tracer.db.get_session_trace(request.run_id)
+                            if trace_row is not None:
+                                data["trace"] = trace_row
+                    except Exception as exc:
+                        logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
+                    finally:
+                        with suppress(Exception):
+                            await tracer.close()
     # Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
     # Use fallback if somehow missing
@@ -823,12 +859,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
     if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
         existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
     data["pipeline_metadata"] = existing_meta
     # Add trace_correlation_id to each trajectory (required for RL training pipeline)
     if "trajectories" in data:
+        normalized_trajs: list[dict[str, Any]] = []
         for traj in data.get("trajectories", []):
-            if isinstance(traj, dict):
-                traj["trace_correlation_id"] = final_cid
+            if isinstance(traj, BaseModel):
+                traj_dict = traj.model_dump()
+            elif isinstance(traj, dict):
+                traj_dict = dict(traj)
+            else:
+                continue
+            traj_dict["trace_correlation_id"] = final_cid
+            if not traj_dict.get("inference_url"):
+                inferred_url = policy_cfg.get("inference_url")
+                if inferred_url:
+                    traj_dict["inference_url"] = inferred_url
+            normalized_trajs.append(traj_dict)
+        if normalized_trajs:
+            data["trajectories"] = normalized_trajs
+            logger.info(
+                "ROLLOUT_EXEC: normalized trajectory sample run_id=%s inference_url=%s",
+                request.run_id,
+                normalized_trajs[0].get("inference_url") if normalized_trajs else None,
+            )
     logger.info(
         "ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
         request.run_id,
@@ -847,6 +901,12 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
             request.run_id,
             existing_meta,
         )
+    if data.get("trace") is None:
+        raise HTTPException(
+            status_code=500,
+            detail="trace_payload_missing: task app did not emit a SessionTrace",
+        )
     # ASSERTION: Verify trace_correlation_id is present in response at all required levels
     assert "trace_correlation_id" in data, (

examples/task_apps/crafter/task_app/grpo_crafter_task_app.py CHANGED Viewed

@@ -3,7 +3,7 @@
 This module now delegates to the TaskAppConfig defined in the colocated example at
 `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
 (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
-`uvx synth-ai serve grpo-crafter` for local development and testing.
+`uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
 """
 from __future__ import annotations

examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py CHANGED Viewed

@@ -59,6 +59,13 @@ class CrafterPolicy(Policy):
         self.trajectory_history: list[dict[str, Any]] = []  # env/policy step records
     async def initialize(self, config: dict[str, Any]) -> None:
+        # DEBUG: Log the incoming config
+        import logging
+        _logger = logging.getLogger(__name__)
+        _logger.debug(f"🔊 [POLICY_INIT] Received config keys: {list(config.keys())}")
+        _logger.debug(f"🔊 [POLICY_INIT] use_vision in config: {'use_vision' in config}, value: {config.get('use_vision')}")
+        _logger.debug(f"🔊 [POLICY_INIT] image_only_mode in config: {'image_only_mode' in config}, value: {config.get('image_only_mode')}")
         if "inference_url" in config:
             self.inference_url = config["inference_url"]
         if "model" in config:
@@ -67,6 +74,7 @@ class CrafterPolicy(Policy):
             self.use_tools = bool(config["use_tools"])
         if "use_vision" in config:
             self.use_vision = bool(config["use_vision"])
+            _logger.debug(f"🔊 [POLICY_INIT] Set use_vision={self.use_vision} from config")
         if "image_only_mode" in config:
             self.image_only_mode = bool(config["image_only_mode"])
             # If image_only_mode is enabled, automatically enable vision
@@ -97,6 +105,9 @@ class CrafterPolicy(Policy):
         self.history_messages = []
         self.turn_index = 0
         self.trajectory_history = []
+        # DEBUG: Log final state
+        _logger.debug(f"🔊 [POLICY_INIT] FINAL STATE: use_vision={self.use_vision}, image_only_mode={self.image_only_mode}, model={self.model}")
     def _append_user_observation(self, observation_text: str) -> None:
         self.history_messages.append({"role": "user", "content": observation_text})
@@ -131,10 +142,36 @@ class CrafterPolicy(Policy):
             history=history,
             turn=turn,
             image_parts=image_parts,
+            image_only_mode=self.image_only_mode,
         )
+        # DEBUG: Log message structure
+        import logging
+        _logger = logging.getLogger(__name__)
+        _logger.debug(f"🔊 [BUILD_REQUEST] Built {len(messages)} messages")
+        for idx, msg in enumerate(messages):
+            role = msg.get("role")
+            content = msg.get("content")
+            if isinstance(content, list):
+                _logger.debug(f"🔊 [BUILD_REQUEST] Message[{idx}] role={role}, content=list[{len(content)}]")
+                for part_idx, part in enumerate(content):
+                    if isinstance(part, dict):
+                        part_type = part.get("type")
+                        _logger.debug(f"🔊 [BUILD_REQUEST]   Part[{part_idx}]: type={part_type}")
+            else:
+                content_len = len(str(content)) if content else 0
+                _logger.debug(f"🔊 [BUILD_REQUEST] Message[{idx}] role={role}, content_len={content_len}")
         payload: dict[str, Any] = {
             "messages": messages,
         }
+        # DEBUG: Verify messages are in payload correctly
+        _logger.debug(f"🔊 [BUILD_REQUEST_PAYLOAD] Created payload with {len(payload['messages'])} messages")
+        for idx, msg in enumerate(payload["messages"]):
+            content = msg.get("content")
+            _logger.debug(f"🔊 [BUILD_REQUEST_PAYLOAD] Payload message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}, len={len(content) if isinstance(content, list) else len(str(content)) if content else 0}")
         if self.model is not None:
             payload["model"] = self.model
         # Thinking controls
@@ -160,6 +197,8 @@ class CrafterPolicy(Policy):
         if self.use_tools:
             payload["tools"] = TOOLS_SCHEMA
             payload["tool_choice"] = "required"
+            payload["function_call"] = {"name": "interact_many"}
+            payload["parallel_tool_calls"] = False
             # Ensure the inference server injects family-specific stop sequences
             # to terminate immediately after the first tool call for compliance.
             payload["stop_after_tool_calls"] = 1
@@ -170,13 +209,7 @@ class CrafterPolicy(Policy):
         response: dict[str, Any],
         use_tools: bool = True,
     ) -> list[dict[str, Any]]:
-        """Turn an inference response into environment tool calls.
-        - If tools were used, expect tool_calls-compatible output and forward as-is
-          in our simple JSON format: {"tool_name": str, "arguments": {...}}.
-        - If no tools, parse plain-text actions using CrafterReActAgent parser and
-          wrap them into a single interact_many tool call.
-        """
+        """Turn an inference response into environment tool calls."""
         # First check if we got actual tool calls
         choices = response.get("choices", [])
         tool_calls: list[dict[str, Any]] = []
@@ -235,24 +268,6 @@ class CrafterPolicy(Policy):
                     normalized.append(tc)
             return normalized
-        # Otherwise, parse plain text content for actions
-        text = ""
-        for choice in choices:
-            msg = choice.get("message", {})
-            content = msg.get("content", "")
-            if content:
-                text = content
-                break
-        if text:
-            # Try to parse actions from the text
-            from .shared import parse_actions
-            actions = parse_actions(text)
-            if actions:
-                # Wrap actions in interact_many tool call
-                return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
         # No actions found
         return []
@@ -360,7 +375,18 @@ class CrafterPolicy(Policy):
             raw_candidate = metadata.get("raw_observation")
             if isinstance(raw_candidate, dict):
                 raw_observation = raw_candidate
+        # DEBUG: Log image extraction
+        import logging
+        _logger = logging.getLogger(__name__)
+        _logger.debug(f"🔊 [POLICY] use_vision={self.use_vision}, has_raw_obs={raw_observation is not None}")
+        if raw_observation:
+            obs = raw_observation.get("observation", raw_observation)
+            data_url = obs.get("observation_image_data_url") if isinstance(obs, dict) else None
+            _logger.debug(f"🔊 [POLICY] has_data_url={data_url is not None}, url_preview={data_url[:50] if data_url else 'NONE'}...")
         image_parts = self._extract_image_parts(raw_observation)
+        _logger.debug(f"🔊 [POLICY] Extracted {len(image_parts)} image parts")
         payload = self.build_inference_request(
             combined_text,
@@ -368,7 +394,17 @@ class CrafterPolicy(Policy):
             turn=self.turn_index,
             image_parts=image_parts,
         )
-        # print("Debugging only:; ", payload)
+        # DEBUG: Verify payload before returning
+        _logger.debug(f"🔊 [POLICY_STEP_RETURN] About to return payload with {len(payload.get('messages', []))} messages")
+        for idx, msg in enumerate(payload.get("messages", [])):
+            content = msg.get("content")
+            _logger.debug(f"🔊 [POLICY_STEP_RETURN] Return message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
+            if isinstance(content, list):
+                _logger.debug(f"🔊 [POLICY_STEP_RETURN]   Content list has {len(content)} items")
+                # Add assertion to catch corruption early
+                assert len(content) > 0, f"Message content list is empty! This should contain images."
         meta_out = {
             "inference_url": self.inference_url,
             "inference_request": payload,
@@ -484,7 +520,7 @@ class CrafterPolicy(Policy):
             "claude-3",         # All Claude 3 models support vision
             "gemini",           # Gemini models
             "qwen-vl",          # Qwen Vision-Language models
-            "qwen2-vl",         # Qwen2 VL
+            "qwen3-vl",         # Qwen3 VL
             "pixtral",          # Mistral's vision model
             "llava",            # LLaVA models
             "phi-3-vision",     # Microsoft Phi-3 Vision

examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py CHANGED Viewed

@@ -45,8 +45,7 @@ class CrafterReActAgent:
             "Action policy:\n"
             "- Always return a single tool call: interact_many({actions: [...]})\n"
             "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
-            "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
-            "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
+            "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n\n"
             "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
             "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
             "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"

examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py CHANGED Viewed

@@ -50,20 +50,19 @@ class OpenAIClient:
         # Make a copy to avoid modifying the original
         fixed_request = request.copy()
-        # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI, Groq);
-        # strip fields those endpoints don't accept
+        # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI).
+        # Groq shares the API surface but we keep tool enforcement fields intact.
         is_openai = False
+        is_groq = False
         try:
             if isinstance(target_url, str):
                 low = target_url.lower()
-                is_openai = (
-                    ("openai.com" in low)
-                    or ("azure" in low and ".openai." in low)
-                    or ("groq.com" in low)
-                    or ("/openai" in low)
-                    or ("/proxy/groq" in low)
-                    or ("/proxy/openai" in low)
-                )
+                if "groq.com" in low or "/proxy/groq" in low:
+                    is_groq = True
+                elif ("openai.com" in low) or ("azure" in low and ".openai." in low) or (
+                    "/proxy/openai" in low
+                ):
+                    is_openai = True
         except Exception:
             is_openai = False
@@ -218,8 +217,20 @@ class OpenAIClient:
             # Do NOT fall back silently; surface the error so callers fail fast
             raise
+        # DEBUG: Log request BEFORE _fix_model_parameters
+        logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Request message[1] content type: {type(request.get('messages', [])[1].get('content') if len(request.get('messages', [])) > 1 else None)}")
+        if len(request.get("messages", [])) > 1:
+            msg1_content = request["messages"][1].get("content")
+            logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Message[1] content value: {msg1_content if not isinstance(msg1_content, list) else f'list[{len(msg1_content)}]'}")
         # Fix parameter compatibility for newer models
         processed_request = self._fix_model_parameters(request, target_url=url)
+        # DEBUG: Log request AFTER _fix_model_parameters
+        logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Processed message[1] content type: {type(processed_request.get('messages', [])[1].get('content') if len(processed_request.get('messages', [])) > 1 else None)}")
+        if len(processed_request.get("messages", [])) > 1:
+            msg1_content_post = processed_request["messages"][1].get("content")
+            logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
         # Log request (redact messages in production)
         logger.info(f"Inference POST target: {url}")
@@ -228,14 +239,32 @@ class OpenAIClient:
         with contextlib.suppress(Exception):
             keys_preview = sorted(processed_request.keys())
             logger.info(f"Request keys: {keys_preview}")
-        # Final hard-guard for OpenAI: ensure unsupported field is not present
+            # DEBUG: Log message structure for vision debugging
+            if "messages" in processed_request:
+                msgs = processed_request["messages"]
+                if isinstance(msgs, list):
+                    logger.debug(f"🔊 [OPENAI_CLIENT] Request has {len(msgs)} messages")
+                    for idx, msg in enumerate(msgs):
+                        if isinstance(msg, dict):
+                            role = msg.get("role")
+                            content = msg.get("content")
+                            if isinstance(content, list):
+                                logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content=list[{len(content)}]")
+                                for part_idx, part in enumerate(content):
+                                    if isinstance(part, dict):
+                                        part_type = part.get("type")
+                                        logger.debug(f"🔊 [OPENAI_CLIENT]   Part[{part_idx}]: type={part_type}")
+                            else:
+                                content_len = len(str(content)) if content else 0
+                                logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
+        # Final hard-guard for OpenAI/Groq: drop unsupported field
         try:
-            if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
+            low_url = url.lower()
+            if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
                 processed_request.pop("stop_after_tool_calls", None)
-                logger.info("Removed stop_after_tool_calls for OpenAI request")
+                logger.info("Removed stop_after_tool_calls for %s request", "Groq/OpenAI")
             # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
-            low_url = url.lower()
             if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
                 processed_request, dict
             ):
@@ -516,47 +545,16 @@ class OpenAIClient:
                                     error_block.get("code") or error_block.get("type") or ""
                                 ).lower()
                             if error_code in {"tool_use_failed", "tool_call_failed"}:
-                                logger.warning(
+                                logger.error(
                                     {
                                         "tool_use_failed": True,
                                         "target": (base_url or self.base_url),
                                         "message": error_block.get("message") if isinstance(error_block, dict) else None,
                                     }
                                 )
-                                fallback_actions = ["move_right", "move_up", "do"]
-                                fallback_response = {
-                                    "id": f"fallback-{int(time.time() * 1000)}",
-                                    "object": "chat.completion",
-                                    "created": int(time.time()),
-                                    "model": processed_request.get("model"),
-                                    "choices": [
-                                        {
-                                            "index": 0,
-                                            "message": {
-                                                "role": "assistant",
-                                                "content": "",
-                                                "tool_calls": [
-                                                    {
-                                                        "id": f"call_fallback_{int(time.time() * 1000)}",
-                                                        "type": "function",
-                                                        "function": {
-                                                            "name": "interact_many",
-                                                            "arguments": json.dumps(
-                                                                {"actions": fallback_actions}
-                                                            ),
-                                                        },
-                                                    }
-                                                ],
-                                            },
-                                            "finish_reason": "tool_calls",
-                                        }
-                                    ],
-                                }
-                                if isinstance(response_data.get("usage"), dict):
-                                    fallback_response["usage"] = response_data["usage"]
-                                if isinstance(error_block, dict):
-                                    fallback_response["error"] = error_block
-                                return fallback_response
+                                raise RuntimeError(
+                                    f"Inference 400 response (tool call failed): {error_block.get('message') if isinstance(error_block, dict) else 'Tool call failed'}"
+                                ) from e
                             # This is a different type of 400 error, don't retry
                             try:
                                 redacted_headers = {}

synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.14py3-none-any.whl → 0.2.17py3-none-any.whl