PyPI - synth-ai - Versions diffs - 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl - Mend

synth-ai 0.2.9.dev5py3-none-any.whl → 0.2.10py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (349) hide show

examples/__init__.py +16 -0
examples/crafter_debug_render.py +23 -17
examples/dev/qwen3_32b_qlora_4xh100.toml +40 -0
examples/multi_step/crafter_rl_lora.md +29 -0
examples/qwen_coder/README.md +102 -0
examples/qwen_coder/_shared.py +113 -0
examples/qwen_coder/configs/coder_lora_30b.toml +61 -0
examples/qwen_coder/configs/coder_lora_4b.toml +57 -0
examples/qwen_coder/configs/coder_lora_small.toml +58 -0
examples/qwen_coder/generate_dataset.py +98 -0
examples/qwen_coder/infer_ft_smoke.py +65 -0
examples/qwen_coder/infer_prod_proxy.py +73 -0
examples/qwen_coder/infer_via_synth.py +87 -0
examples/qwen_coder/scripts/infer_coder.sh +19 -0
examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
examples/qwen_coder/sft_full_17b.py +103 -0
examples/qwen_coder/sft_lora_30b.py +110 -0
examples/qwen_coder/subset_jsonl.py +39 -0
examples/qwen_coder/todos.md +38 -0
examples/qwen_coder/validate_jsonl.py +60 -0
examples/rl/configs/eval_base_qwen.toml +1 -1
examples/rl/configs/rl_from_base_qwen17.toml +1 -1
examples/rl/download_dataset.py +26 -10
examples/rl/run_eval.py +53 -52
examples/rl/run_rl_and_save.py +29 -12
examples/rl/task_app/math_single_step.py +180 -41
examples/rl/task_app/math_task_app.py +14 -6
examples/sft/README.md +139 -0
examples/sft/configs/crafter_fft_qwen0p6b.toml +44 -0
examples/sft/configs/crafter_lora_qwen0p6b.toml +45 -0
examples/sft/evaluate.py +117 -0
examples/sft/export_dataset.py +117 -0
examples/sft/generate_traces.py +162 -0
examples/swe/__init__.py +12 -0
examples/swe/task_app/README.md +105 -0
examples/swe/task_app/__init__.py +2 -0
examples/swe/task_app/grpo_swe_mini.py +571 -0
examples/swe/task_app/grpo_swe_mini_task_app.py +136 -0
examples/swe/task_app/hosted/README.md +173 -0
examples/swe/task_app/hosted/__init__.py +5 -0
examples/swe/task_app/hosted/branching.py +143 -0
examples/swe/task_app/hosted/environment_routes.py +1289 -0
examples/swe/task_app/hosted/envs/__init__.py +1 -0
examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
examples/swe/task_app/hosted/envs/mini_swe/environment.py +1164 -0
examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
examples/swe/task_app/hosted/hosted_app.py +204 -0
examples/swe/task_app/hosted/inference/__init__.py +5 -0
examples/swe/task_app/hosted/inference/openai_client.py +618 -0
examples/swe/task_app/hosted/main.py +100 -0
examples/swe/task_app/hosted/policy_routes.py +1079 -0
examples/swe/task_app/hosted/registry.py +195 -0
examples/swe/task_app/hosted/rollout.py +1869 -0
examples/swe/task_app/hosted/storage/__init__.py +5 -0
examples/swe/task_app/hosted/storage/volume.py +211 -0
examples/swe/task_app/hosted/test_agents.py +161 -0
examples/swe/task_app/hosted/test_service.py +137 -0
examples/swe/task_app/hosted/utils.py +62 -0
examples/vlm/PROPOSAL.md +53 -0
examples/vlm/README.md +68 -0
examples/vlm/configs/crafter_vlm_gpt4o.toml +44 -0
examples/vlm/crafter_image_only_agent.py +207 -0
examples/vlm/crafter_openai_vlm_agent.py +277 -0
examples/vlm/filter_image_rows.py +63 -0
examples/vlm/run_crafter_vlm_benchmark.py +316 -0
examples/warming_up_to_rl/analyze_trace_db.py +12 -10
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +11 -1
examples/warming_up_to_rl/export_trace_sft.py +218 -36
examples/warming_up_to_rl/groq_test.py +15 -8
examples/warming_up_to_rl/manage_secrets.py +29 -25
examples/warming_up_to_rl/readme.md +9 -2
examples/warming_up_to_rl/run_eval.py +137 -61
examples/warming_up_to_rl/run_fft_and_save.py +131 -60
examples/warming_up_to_rl/run_local_rollout.py +88 -39
examples/warming_up_to_rl/run_local_rollout_modal.py +114 -28
examples/warming_up_to_rl/run_local_rollout_parallel.py +81 -20
examples/warming_up_to_rl/run_local_rollout_traced.py +126 -23
examples/warming_up_to_rl/run_rl_and_save.py +35 -12
examples/warming_up_to_rl/run_rollout_remote.py +44 -19
examples/warming_up_to_rl/task_app/README.md +6 -2
examples/warming_up_to_rl/task_app/grpo_crafter.py +319 -57
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +11 -30
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +9 -11
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +137 -182
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +150 -57
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +105 -69
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +19 -7
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +45 -42
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +47 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +198 -92
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -2
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +361 -263
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +21 -23
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +394 -274
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +56 -62
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +6 -15
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +4 -3
synth_ai/__init__.py +1 -0
synth_ai/api/models/supported.py +376 -0
synth_ai/api/train/builders.py +157 -26
synth_ai/api/train/cli.py +213 -57
synth_ai/api/train/config_finder.py +65 -5
synth_ai/api/train/env_resolver.py +33 -15
synth_ai/api/train/pollers.py +13 -4
synth_ai/api/train/supported_algos.py +139 -0
synth_ai/api/train/task_app.py +5 -3
synth_ai/api/train/utils.py +33 -48
synth_ai/cli/__init__.py +19 -4
synth_ai/cli/_modal_wrapper.py +28 -0
synth_ai/cli/_typer_patch.py +49 -0
synth_ai/cli/balance.py +2 -3
synth_ai/cli/calc.py +1 -1
synth_ai/cli/demo.py +21 -6
synth_ai/cli/recent.py +2 -2
synth_ai/cli/rl_demo.py +77 -17
synth_ai/cli/root.py +116 -39
synth_ai/cli/status.py +2 -2
synth_ai/cli/task_apps.py +1699 -259
synth_ai/cli/traces.py +7 -4
synth_ai/cli/turso.py +73 -0
synth_ai/cli/watch.py +12 -18
synth_ai/core/experiment.py +0 -2
synth_ai/demo_registry.py +68 -31
synth_ai/demos/core/cli.py +516 -194
synth_ai/demos/demo_task_apps/__init__.py +3 -3
synth_ai/demos/demo_task_apps/core.py +64 -28
synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +37 -30
synth_ai/demos/demo_task_apps/math/_common.py +1 -2
synth_ai/demos/demo_task_apps/math/app.py +2 -1
synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
synth_ai/demos/demo_task_apps/math/modal_task_app.py +183 -82
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
synth_ai/environments/examples/bandit/engine.py +12 -4
synth_ai/environments/examples/bandit/taskset.py +4 -4
synth_ai/environments/examples/crafter_classic/environment.py +76 -1
synth_ai/environments/reproducibility/tree.py +5 -6
synth_ai/environments/service/app.py +11 -12
synth_ai/environments/service/core_routes.py +10 -9
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/core.py +1 -0
synth_ai/environments/tasks/filters.py +5 -6
synth_ai/environments/tasks/utils.py +4 -5
synth_ai/evals/base.py +0 -2
synth_ai/handshake.py +11 -9
synth_ai/http.py +1 -1
synth_ai/http_client.py +43 -11
synth_ai/inference/__init__.py +0 -2
synth_ai/inference/client.py +20 -6
synth_ai/jobs/client.py +103 -78
synth_ai/learning/__init__.py +41 -6
synth_ai/learning/algorithms.py +14 -0
synth_ai/learning/client.py +121 -29
synth_ai/learning/config.py +2 -40
synth_ai/learning/constants.py +0 -2
synth_ai/learning/ft_client.py +4 -56
synth_ai/learning/health.py +13 -7
synth_ai/learning/jobs.py +43 -47
synth_ai/{rl → learning/rl}/__init__.py +14 -5
synth_ai/learning/rl/client.py +267 -0
synth_ai/learning/rl/config.py +31 -0
synth_ai/{rl → learning/rl}/contracts.py +5 -10
synth_ai/{rl → learning/rl}/env_keys.py +45 -16
synth_ai/learning/rl/secrets.py +13 -0
synth_ai/learning/rl_client.py +2 -253
synth_ai/learning/sft/__init__.py +29 -0
synth_ai/learning/sft/client.py +68 -0
synth_ai/learning/sft/config.py +270 -0
synth_ai/learning/sft/data.py +295 -0
synth_ai/learning/sse.py +25 -26
synth_ai/learning/validators.py +25 -24
synth_ai/lm/__init__.py +21 -47
synth_ai/task/__init__.py +26 -27
synth_ai/task/apps/__init__.py +18 -19
synth_ai/task/auth.py +35 -23
synth_ai/task/client.py +15 -13
synth_ai/task/contracts.py +37 -35
synth_ai/task/datasets.py +9 -6
synth_ai/task/errors.py +11 -10
synth_ai/task/health.py +17 -11
synth_ai/task/json.py +58 -24
synth_ai/task/proxy.py +15 -14
synth_ai/task/rubrics.py +22 -15
synth_ai/task/server.py +43 -17
synth_ai/task/tracing_utils.py +12 -7
synth_ai/task/validators.py +0 -1
synth_ai/task/vendors.py +5 -7
synth_ai/tracing_v3/__init__.py +2 -0
synth_ai/tracing_v3/abstractions.py +21 -4
synth_ai/tracing_v3/db_config.py +26 -1
synth_ai/tracing_v3/decorators.py +18 -15
synth_ai/tracing_v3/examples/basic_usage.py +3 -2
synth_ai/tracing_v3/hooks.py +6 -4
synth_ai/tracing_v3/llm_call_record_helpers.py +6 -6
synth_ai/tracing_v3/replica_sync.py +1 -0
synth_ai/tracing_v3/session_tracer.py +63 -16
synth_ai/tracing_v3/storage/base.py +89 -1
synth_ai/tracing_v3/storage/config.py +21 -8
synth_ai/tracing_v3/storage/factory.py +10 -8
synth_ai/tracing_v3/storage/utils.py +4 -2
synth_ai/tracing_v3/turso/daemon.py +7 -2
synth_ai/tracing_v3/turso/models.py +5 -2
synth_ai/tracing_v3/turso/native_manager.py +1173 -0
synth_ai/tracing_v3/utils.py +4 -3
synth_ai/v0/api/__init__.py +8 -0
synth_ai/v0/api/models/__init__.py +8 -0
synth_ai/v0/api/models/supported.py +8 -0
synth_ai/v0/config/__init__.py +15 -0
synth_ai/v0/config/base_url.py +12 -0
synth_ai/v0/lm/__init__.py +51 -0
synth_ai/{lm → v0/lm}/caching/ephemeral.py +3 -5
synth_ai/{lm → v0/lm}/caching/handler.py +4 -4
synth_ai/{lm → v0/lm}/caching/initialize.py +1 -1
synth_ai/{lm → v0/lm}/caching/persistent.py +1 -1
synth_ai/{lm → v0/lm}/config.py +6 -1
synth_ai/{lm → v0/lm}/core/all.py +9 -9
synth_ai/{lm → v0/lm}/core/exceptions.py +0 -2
synth_ai/{lm → v0/lm}/core/main.py +19 -7
synth_ai/{lm → v0/lm}/core/main_v3.py +10 -10
synth_ai/{lm → v0/lm}/core/synth_models.py +2 -15
synth_ai/{lm → v0/lm}/core/vendor_clients.py +6 -4
synth_ai/{lm → v0/lm}/overrides.py +4 -4
synth_ai/{lm → v0/lm}/provider_support/anthropic.py +4 -4
synth_ai/{lm → v0/lm}/provider_support/openai.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/handler.py +5 -5
synth_ai/{lm → v0/lm}/structured_outputs/rehabilitate.py +1 -1
synth_ai/{lm → v0/lm}/vendors/core/anthropic_api.py +16 -16
synth_ai/{lm → v0/lm}/vendors/core/gemini_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/mistral_api.py +5 -5
synth_ai/{lm → v0/lm}/vendors/core/openai_api.py +12 -10
synth_ai/{lm → v0/lm}/vendors/openai_standard.py +11 -9
synth_ai/{lm → v0/lm}/vendors/openai_standard_responses.py +8 -5
synth_ai/{lm → v0/lm}/vendors/supported/custom_endpoint.py +4 -6
synth_ai/{lm → v0/lm}/vendors/supported/deepseek.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/grok.py +2 -2
synth_ai/{lm → v0/lm}/vendors/supported/groq.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/ollama.py +1 -1
synth_ai/{lm → v0/lm}/vendors/supported/openrouter.py +3 -3
synth_ai/{lm → v0/lm}/vendors/supported/together.py +1 -1
synth_ai/{lm → v0/lm}/vendors/synth_client.py +38 -11
synth_ai/v0/tracing/upload.py +32 -135
synth_ai/v0/tracing_v3/__init__.py +10 -0
synth_ai/v0/tracing_v3/abstractions.py +3 -0
synth_ai/v0/tracing_v3/decorators.py +3 -0
synth_ai/v0/tracing_v3/llm_call_record_helpers.py +3 -0
synth_ai/v0/tracing_v3/session_tracer.py +3 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/METADATA +10 -7
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/RECORD +294 -258
examples/common_old/backend.py +0 -21
examples/evals_old/README.md +0 -98
examples/evals_old/__init__.py +0 -6
examples/evals_old/compare_models.py +0 -1037
examples/evals_old/example_log.md +0 -145
examples/evals_old/run_demo.sh +0 -126
examples/evals_old/trace_analysis.py +0 -270
examples/finetuning_old/_backup_synth_qwen/config.toml +0 -29
examples/finetuning_old/_backup_synth_qwen/example_log.md +0 -324
examples/finetuning_old/_backup_synth_qwen/filter_traces.py +0 -60
examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +0 -239
examples/finetuning_old/_backup_synth_qwen/purge_v3_traces.py +0 -109
examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +0 -1924
examples/finetuning_old/_backup_synth_qwen/readme.md +0 -49
examples/finetuning_old/_backup_synth_qwen/run_crafter_qwen4b.py +0 -114
examples/finetuning_old/_backup_synth_qwen/run_demo.sh +0 -195
examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +0 -118
examples/finetuning_old/synth_qwen_v1/README.md +0 -68
examples/finetuning_old/synth_qwen_v1/filter_traces.py +0 -60
examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +0 -239
examples/finetuning_old/synth_qwen_v1/finetune.py +0 -46
examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +0 -71
examples/finetuning_old/synth_qwen_v1/infer.py +0 -37
examples/finetuning_old/synth_qwen_v1/poll.py +0 -44
examples/finetuning_old/synth_qwen_v1/prepare_data.py +0 -35
examples/finetuning_old/synth_qwen_v1/purge_v3_traces.py +0 -109
examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +0 -1932
examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +0 -207
examples/finetuning_old/synth_qwen_v1/run_ft_job.py +0 -232
examples/finetuning_old/synth_qwen_v1/upload_data.py +0 -34
examples/finetuning_old/synth_qwen_v1/util.py +0 -147
examples/rl_old/task_app.py +0 -962
synth_ai/experimental/synth_oss.py +0 -446
synth_ai/install_sqld.sh +0 -40
synth_ai/learning/filtering.py +0 -0
synth_ai/learning/offline/dpo.py +0 -0
synth_ai/learning/offline/providers.py +0 -7
synth_ai/learning/offline/sft.py +0 -0
synth_ai/learning/offline/shared.py +0 -0
synth_ai/learning/online/grpo.py +0 -0
synth_ai/learning/online/irft.py +0 -0
synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
synth_ai/learning/prompts/gepa.py +0 -0
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
synth_ai/learning/prompts/mipro.py +0 -289
synth_ai/learning/prompts/random_search.py +0 -246
synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
synth_ai/rl/secrets.py +0 -19
synth_ai/scripts/verify_rewards.py +0 -100
synth_ai/tracing/__init__.py +0 -30
synth_ai/tracing_v1/__init__.py +0 -33
synth_ai/tracing_v3/turso/__init__.py +0 -25
synth_ai/tracing_v3/turso/manager.py +0 -774
synth_ai/zyk/__init__.py +0 -30
/synth_ai/{lm → v0/lm}/caching/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/caching/constants.py +0 -0
/synth_ai/{lm → v0/lm}/caching/dbs.py +0 -0
/synth_ai/{lm → v0/lm}/constants.py +0 -0
/synth_ai/{lm → v0/lm}/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/cost/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/cost/monitor.py +0 -0
/synth_ai/{lm → v0/lm}/cost/statefulness.py +0 -0
/synth_ai/{lm → v0/lm}/injection.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/provider_support/suppress_logging.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/structured_outputs/inject.py +0 -0
/synth_ai/{lm → v0/lm}/tools/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/tools/base.py +0 -0
/synth_ai/{lm → v0/lm}/unified_interface.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/base.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/core/synth_dev_api.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/local/ollama.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/retries.py +0 -0
/synth_ai/{lm → v0/lm}/vendors/supported/__init__.py +0 -0
/synth_ai/{lm → v0/lm}/warmup.py +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.10.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py CHANGED Viewed

@@ -1,8 +1,10 @@
 from __future__ import annotations
+import contextlib
 import logging
+import os
 from datetime import datetime
-from typing import Any, Dict, List, Optional
+from typing import Any
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
@@ -11,8 +13,6 @@ from .envs.crafter.policy import CrafterPolicy
 from .inference.openai_client import create_inference_client
 from .registry import registry
 from .storage.volume import storage
-import os
-from typing import Tuple
 # Token budgeting (shared logic with inference server)
 try:
@@ -34,10 +34,10 @@ router = APIRouter()
 class PolicyCreateRequest(BaseModel):
     policy_name: str
-    config: Dict[str, Any] = {}
-    parent_policy_id: Optional[str] = None
+    config: dict[str, Any] = {}
+    parent_policy_id: str | None = None
     rl_run_id: str
-    bound_env_id: Optional[str] = None
+    bound_env_id: str | None = None
 class PolicyCreateResponse(BaseModel):
@@ -46,15 +46,15 @@ class PolicyCreateResponse(BaseModel):
 class PolicyStepRequest(BaseModel):
     policy_id: str
-    observation: Dict[str, Any]
-    state: Optional[Dict[str, Any]] = None
-    metadata: Optional[Dict[str, Any]] = None
+    observation: dict[str, Any]
+    state: dict[str, Any] | None = None
+    metadata: dict[str, Any] | None = None
     dry_run: bool = False
 class PolicyStepResponse(BaseModel):
-    tool_calls: List[Dict[str, Any]]
-    meta: Dict[str, Any]
+    tool_calls: list[dict[str, Any]]
+    meta: dict[str, Any]
 class PolicySnapshotRequest(BaseModel):
@@ -91,14 +91,23 @@ async def create_policy(
 ) -> PolicyCreateResponse:
     """Create a new policy instance."""
     try:
-        task_app = req.app.state.task_app
-        # Set defaults from TaskApp if not provided
-        config = request.config.copy()
-        if "inference_url" not in config:
-            config["inference_url"] = task_app.vllm_base_url
-        if "model" not in config and task_app.default_model:
-            config["model"] = task_app.default_model
+        task_app = getattr(req.app.state, "task_app", None)
+        # Set defaults from TaskApp / environment if not provided
+        config = dict(request.config or {})
+        if "inference_url" not in config and task_app is not None:
+            base_url = getattr(task_app, "vllm_base_url", None)
+            if base_url:
+                config["inference_url"] = base_url
+        if "model" not in config and task_app is not None:
+            default_model = getattr(task_app, "default_model", None)
+            if default_model:
+                config["model"] = default_model
+        if "inference_url" not in config or "model" not in config:
+            raise HTTPException(
+                status_code=422,
+                detail="Policy configuration must include 'inference_url' and 'model'.",
+            )
         # Create policy instance based on name
         pname = request.policy_name.lower()
@@ -110,11 +119,13 @@ async def create_policy(
             await policy.initialize(config)
         elif pname in ["wordle-react", "wordle"]:
             try:
-                from .envs.wordle.policy import WordlePolicy as _WordlePolicy
+                from .envs.wordle.policy import WordlePolicy
             except Exception as e:
-                raise HTTPException(status_code=500, detail=f"Wordle policy unavailable: {e}")
+                raise HTTPException(
+                    status_code=500, detail=f"Wordle policy unavailable: {e}"
+                ) from e
-            policy = _WordlePolicy(
+            policy = WordlePolicy(
                 inference_url=config["inference_url"],
                 model=config["model"],
                 word_length=int(config["word_length"]),
@@ -123,22 +134,24 @@ async def create_policy(
             await policy.initialize(config)
         elif pname in ["sokoban-react", "sokoban"]:
             try:
-                from .envs.sokoban.policy import SokobanPolicy as _SokobanPolicy
+                from .envs.sokoban.policy import SokobanPolicy
             except Exception as e:
-                raise HTTPException(status_code=500, detail=f"Sokoban policy unavailable: {e}")
+                raise HTTPException(
+                    status_code=500, detail=f"Sokoban policy unavailable: {e}"
+                ) from e
-            policy = _SokobanPolicy(
+            policy = SokobanPolicy(
                 inference_url=config["inference_url"],
                 model=config["model"],
             )
             await policy.initialize(config)
         elif pname in ["math-react", "math"]:
             try:
-                from .envs.math.policy import MathPolicy as _MathPolicy
+                from .envs.math.policy import MathPolicy
             except Exception as e:
-                raise HTTPException(status_code=500, detail=f"Math policy unavailable: {e}")
+                raise HTTPException(status_code=500, detail=f"Math policy unavailable: {e}") from e
-            policy = _MathPolicy(
+            policy = MathPolicy(
                 inference_url=config["inference_url"],
                 model=config["model"],
             )
@@ -160,7 +173,7 @@ async def create_policy(
     except Exception as e:
         logger.error(f"Failed to create policy: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail=str(e)) from e
 @router.post("/step", response_model=PolicyStepResponse)
@@ -171,140 +184,172 @@ async def step_policy(
     """Execute a policy step to generate actions."""
     handle = registry.get_policy(request.policy_id)
     if not handle:
-        raise HTTPException(
-            status_code=404, detail=f"Policy {request.policy_id} not found"
-        )
+        raise HTTPException(status_code=404, detail=f"Policy {request.policy_id} not found")
     try:
         task_app = req.app.state.task_app
         policy = handle.policy
         tracing_context = getattr(req.state, "rollout_tracing", None)
-        # Format observation text conditionally for each env
+        obs_text = request.observation
         if isinstance(request.observation, dict):
             if isinstance(policy, CrafterPolicy):
                 from .envs.crafter.shared import format_observation as format_crafter
                 obs_text = format_crafter(request.observation)
-            elif True:
+            else:
+                formatted: str | None = None
+                # Wordle formatting
                 try:
-                    from .envs.wordle.policy import WordlePolicy as _WordlePolicy
+                    from .envs.wordle.policy import WordlePolicy
                 except Exception:
-                    _WordlePolicy = None  # type: ignore
+                    wordle_policy_cls = None  # type: ignore[assignment]
+                else:
+                    wordle_policy_cls = WordlePolicy
-                if _WordlePolicy is not None and isinstance(policy, _WordlePolicy):
+                if formatted is None and wordle_policy_cls is not None and isinstance(
+                    policy, wordle_policy_cls
+                ):
                     from .envs.wordle.shared import format_observation_wordle
-                # ASSERTION: Validate observation structure
-                assert request.observation is not None, (
-                    "request.observation cannot be None"
-                )
-                assert isinstance(request.observation, dict), (
-                    f"request.observation must be dict, got {type(request.observation)}"
-                )
+                    # ASSERTION: Validate observation structure
+                    assert request.observation is not None, "request.observation cannot be None"
+                    assert isinstance(request.observation, dict), (
+                        f"request.observation must be dict, got {type(request.observation)}"
+                    )
-                # Required keys for Wordle observation
-                required_keys = {
-                    "text",
-                    "status",
-                    "remaining_guesses",
-                    "guesses",
-                    "feedback",
-                    "reward_last",
-                    "total_reward",
-                    "terminated",
-                }
-                missing_keys = required_keys - set(request.observation.keys())
-                assert not missing_keys, (
-                    f"Wordle observation missing required keys: {missing_keys}"
-                )
+                    required_keys = {
+                        "text",
+                        "status",
+                        "remaining_guesses",
+                        "guesses",
+                        "feedback",
+                        "reward_last",
+                        "total_reward",
+                        "terminated",
+                    }
+                    missing_keys = required_keys - set(request.observation.keys())
+                    assert (
+                        not missing_keys
+                    ), f"Wordle observation missing required keys: {missing_keys}"
+                    print("DEBUG POLICY_ROUTES: About to format Wordle observation")
+                    print(f"DEBUG POLICY_ROUTES: Observation type: {type(request.observation)}")
+                    print(
+                        f"DEBUG POLICY_ROUTES: Observation keys: {list(request.observation.keys())}"
+                    )
+                    feedback_val = request.observation["feedback"]
+                    print(f"DEBUG POLICY_ROUTES: Observation feedback: {feedback_val}")
+                    print(
+                        f"DEBUG POLICY_ROUTES: Observation guesses: {request.observation['guesses']}"
+                    )
+                    print(
+                        "DEBUG POLICY_ROUTES: Observation text length: "
+                        f"{len(request.observation['text'])}"
+                    )
-                print("DEBUG POLICY_ROUTES: About to format Wordle observation")
-                print(
-                    f"DEBUG POLICY_ROUTES: Observation type: {type(request.observation)}"
-                )
-                print(
-                    f"DEBUG POLICY_ROUTES: Observation keys: {list(request.observation.keys())}"
-                )
-                feedback_val = request.observation["feedback"]
-                print(f"DEBUG POLICY_ROUTES: Observation feedback: {feedback_val}")
-                print(
-                    f"DEBUG POLICY_ROUTES: Observation guesses: {request.observation['guesses']}"
-                )
-                print(
-                    f"DEBUG POLICY_ROUTES: Observation text length: {len(request.observation['text'])}"
-                )
+                    guesses = request.observation["guesses"]
+                    feedback = request.observation["feedback"]
+                    assert isinstance(guesses, list), f"guesses must be list, got {type(guesses)}"
+                    assert isinstance(
+                        feedback, list
+                    ), f"feedback must be list, got {type(feedback)}"
-                # ASSERTION: Validate feedback data
-                guesses = request.observation["guesses"]
-                feedback = request.observation["feedback"]
-                assert isinstance(guesses, list), (
-                    f"guesses must be list, got {type(guesses)}"
-                )
-                assert isinstance(feedback, list), (
-                    f"feedback must be list, got {type(feedback)}"
-                )
-                # Note: We don't assert equal lengths here since the environment is broken
+                    formatted = format_observation_wordle(request.observation)
-                obs_text = format_observation_wordle(request.observation)
+                    assert isinstance(formatted, str), (
+                        f"obs_text must be string, got {type(formatted)}"
+                    )
+                    assert len(formatted) > 0, "obs_text cannot be empty"
+                    assert "WORDLE" in formatted, "obs_text must contain 'WORDLE' header"
+                    assert "Respond with a single tool call" in formatted, (
+                        "obs_text must contain instruction text"
+                    )
-                # ASSERTION: Validate formatted output
-                assert isinstance(obs_text, str), (
-                    f"obs_text must be string, got {type(obs_text)}"
-                )
-                assert len(obs_text) > 0, "obs_text cannot be empty"
-                assert "WORDLE" in obs_text, "obs_text must contain 'WORDLE' header"
-                assert "Respond with a single tool call" in obs_text, (
-                    "obs_text must contain instruction text"
-                )
+                    print(
+                        f"DEBUG POLICY_ROUTES: Formatted obs_text length: {len(formatted)}"
+                    )
+                    print(
+                        "DEBUG POLICY_ROUTES: Formatted obs_text contains 🟩: "
+                        f"{'🟩' in formatted}"
+                    )
+                    print(
+                        "DEBUG POLICY_ROUTES: Formatted obs_text contains 🟨: "
+                        f"{'🟨' in formatted}"
+                    )
+                    print(
+                        "DEBUG POLICY_ROUTES: Formatted obs_text contains ⬛: "
+                        f"{'⬛' in formatted}"
+                    )
+                    print(
+                        "DEBUG POLICY_ROUTES: Formatted obs_text first 200 chars: "
+                        f"{formatted[:200]}"
+                    )
-                print(
-                    f"DEBUG POLICY_ROUTES: Formatted obs_text length: {len(obs_text)}"
-                )
-                print(
-                    f"DEBUG POLICY_ROUTES: Formatted obs_text contains 🟩: {'🟩' in obs_text}"
-                )
-                print(
-                    f"DEBUG POLICY_ROUTES: Formatted obs_text contains 🟨: {'🟨' in obs_text}"
-                )
-                print(
-                    f"DEBUG POLICY_ROUTES: Formatted obs_text contains ⬛: {'⬛' in obs_text}"
-                )
-                print(
-                    f"DEBUG POLICY_ROUTES: Formatted obs_text first 200 chars: {obs_text[:200]}"
-                )
-            elif True:
+                # Sokoban formatting
                 try:
-                    from .envs.sokoban.policy import SokobanPolicy as _SokobanPolicy
+                    from .envs.sokoban.policy import SokobanPolicy
                 except Exception:
-                    _SokobanPolicy = None  # type: ignore
-                if _SokobanPolicy is not None and isinstance(policy, _SokobanPolicy):
+                    sokoban_policy_cls = None  # type: ignore[assignment]
+                else:
+                    sokoban_policy_cls = SokobanPolicy
+                if formatted is None and sokoban_policy_cls is not None and isinstance(
+                    policy, sokoban_policy_cls
+                ):
                     from .envs.sokoban.shared import format_observation_sokoban
-                    obs_text = format_observation_sokoban(request.observation)
-            elif True:
+                    formatted = format_observation_sokoban(request.observation)
+                # Math formatting
                 try:
-                    from .envs.math.policy import MathPolicy as _MathPolicy
+                    from .envs.math.policy import MathPolicy
                 except Exception:
-                    _MathPolicy = None  # type: ignore
-                if _MathPolicy is not None and isinstance(policy, _MathPolicy):
-                    # Simple extraction of problem text
+                    math_policy_cls = None  # type: ignore[assignment]
+                else:
+                    math_policy_cls = MathPolicy
+                if formatted is None and math_policy_cls is not None and isinstance(
+                    policy, math_policy_cls
+                ):
                     try:
-                        obs_text = str(request.observation.get("problem_text") or request.observation)
+                        formatted = str(
+                            request.observation.get("problem_text") or request.observation
+                        )
                     except Exception:
-                        obs_text = str(request.observation)
-            else:
-                obs_text = str(request.observation)
-        else:
-            obs_text = request.observation
+                        formatted = str(request.observation)
+                if formatted is None:
+                    formatted = str(request.observation)
+                obs_text = formatted
+        # Merge metadata with raw observation for multimodal policies
+        step_metadata: dict[str, Any] = dict(request.metadata or {})
+        step_metadata["raw_observation"] = request.observation
         # Execute policy step to get inference request
         tool_calls, meta = await policy.step(
             observation_text=obs_text,
             state=request.state,
-            metadata=request.metadata,
+            metadata=step_metadata,
         )
+        # Compact tool call summary
+        with contextlib.suppress(Exception):
+            _summary: list[dict[str, Any]] = []
+            _tc = tool_calls or []
+            for _item in (_tc if isinstance(_tc, list) else []):
+                if isinstance(_item, dict):
+                    _tool = _item.get("tool")
+                    _args = _item.get("args")
+                    _keys = list(_args.keys()) if isinstance(_args, dict) else []
+                    _summary.append({"tool": _tool, "args_keys": _keys})
+            logger.info(
+                "POLICY_STEP: tool_calls=%d summary=%s",
+                len(_tc),
+                _summary,
+            )
         # If not dry run, perform inference
         if not request.dry_run and "inference_request" in meta:
@@ -312,13 +357,11 @@ async def step_policy(
             inf_req = meta["inference_request"]
             msgs = inf_req["messages"]
             model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
-            system_messages: List[str] = []
-            user_messages: List[str] = []
+            system_messages: list[str] = []
+            user_messages: list[str] = []
             if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
                 sys_text = msgs[0]["content"]
-                policy_name = (
-                    getattr(policy, "name", "") or type(policy).__name__.lower()
-                )
+                policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
                 # Assert environment-specific prompts match the policy
                 if policy_name in ("wordle-react", "wordle"):
@@ -342,7 +385,6 @@ async def step_policy(
                         raise ValueError(
                             f"PROMPT MISMATCH: Crafter policy {policy_name} received Wordle system prompt: {sys_text[:200]}..."
                         )
                 elif policy_name in ("sokoban-react", "sokoban"):
                     if "Sokoban" not in sys_text:
                         raise ValueError(
@@ -363,6 +405,7 @@ async def step_policy(
             # Emit full system/user prompts for observability (no secrets included)
             try:
                 def _as_text(content: object) -> str:
                     if isinstance(content, str):
                         return content
@@ -380,40 +423,54 @@ async def step_policy(
                         return "".join(parts)
                     return str(content)
-                system_messages: list[str] = []
-                user_messages: list[str] = []
+                system_prompt_records: list[dict[str, Any]] = []
+                user_prompt_records: list[dict[str, Any]] = []
                 for message in msgs:
                     role = message.get("role")
-                    content = _as_text(message.get("content"))
+                    raw_content = message.get("content")
+                    content = _as_text(raw_content)
+                    record = {"role": role, "text": content, "content": raw_content}
                     if role == "system":
-                        system_messages.append(content)
+                        system_prompt_records.append(record)
                     elif role == "user":
-                        user_messages.append(content)
+                        user_prompt_records.append(record)
-                if system_messages:
+                logger.info(
+                    "PROMPTS: system_msgs=%d user_msgs=%d last_user_chars=%d",
+                    len(system_prompt_records),
+                    len(user_prompt_records),
+                    len(user_prompt_records[-1].get("text", "")) if user_prompt_records else 0,
+                )
+                if system_prompt_records:
                     logger.info("PROMPT_DUMP_SYSTEM_BEGIN")
-                    for idx, smsg in enumerate(system_messages):
+                    for idx, rec in enumerate(system_prompt_records):
+                        smsg = rec.get("text", "")
                         logger.info(f"SYSTEM[{idx}]\n{smsg}")
                     logger.info("PROMPT_DUMP_SYSTEM_END")
-                if user_messages:
+                if user_prompt_records:
                     logger.info("PROMPT_DUMP_USER_BEGIN")
-                    for idx, umsg in enumerate(user_messages):
+                    for idx, rec in enumerate(user_prompt_records):
+                        umsg = rec.get("text", "")
                         logger.info(f"USER[{idx}]\n{umsg}")
                     logger.info("PROMPT_DUMP_USER_END")
                     # Print concise preview for visibility in standard logs
-                    try:
-                        last_user = user_messages[-1] if user_messages else ""
-                        #preview = last_user[:400] if isinstance(last_user, str) else str(last_user)[:400]
+                    with contextlib.suppress(Exception):
+                        last_user = (
+                            user_prompt_records[-1].get("text", "")
+                            if user_prompt_records
+                            else ""
+                        )
                         print(f"[task:crafter] user prompt: {last_user}", flush=True)
-                    except Exception:
-                        pass
             except Exception as e:
                 logger.warning(f"PROMPT_DUMP_FAILED: {e}")
             if tracing_context is not None:
                 try:
-                    await tracing_context.record_policy_prompts(system_messages, user_messages)
+                    await tracing_context.record_policy_prompts(
+                        system_prompt_records, user_prompt_records
+                    )
                 except Exception as exc:
                     logger.debug(f"TRACING_PROMPTS_FAIL: {exc}")
@@ -426,25 +483,37 @@ async def step_policy(
             )
             # Ensure meta carries the final target URL for downstream logging/clients
-            try:
+            with contextlib.suppress(Exception):
                 meta["inference_url"] = target_url
-            except Exception:
-                pass
             # Select API key based on resolved target URL
             api_key_override = None
             try:
                 import os as _os
                 if isinstance(target_url, str):
                     low_url = target_url.lower()
-                    if "openai.com" in low_url:
-                        api_key_override = _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
-                    elif "groq.com" in low_url:
+                    # Proxy endpoints should not receive a bearer; the server-side proxy holds the vendor key
+                    if "/proxy/groq" in low_url or "/proxy/openai" in low_url:
+                        api_key_override = None
+                    elif "openai.com" in low_url:
+                        api_key_override = _os.getenv("OPENAI_API_KEY") or getattr(
+                            task_app, "openai_api_key", None
+                        )
+                    elif "groq.com" in low_url or "/proxy/groq" in low_url:
                         api_key_override = _os.getenv("GROQ_API_KEY")
                     else:
-                        api_key_override = _os.getenv("SYNTH_API_KEY") or _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
+                        api_key_override = (
+                            _os.getenv("SYNTH_API_KEY")
+                            or _os.getenv("OPENAI_API_KEY")
+                            or getattr(task_app, "openai_api_key", None)
+                        )
                 else:
-                    api_key_override = _os.getenv("SYNTH_API_KEY") or _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
+                    api_key_override = (
+                        _os.getenv("SYNTH_API_KEY")
+                        or _os.getenv("OPENAI_API_KEY")
+                        or getattr(task_app, "openai_api_key", None)
+                    )
             except Exception:
                 api_key_override = None
@@ -455,7 +524,9 @@ async def step_policy(
                     masked = "<masked>"
                 logger.debug(f"INFERENCE_AUTH: Using bearer key {masked}")
             else:
-                logger.warning("INFERENCE_AUTH: No API key resolved for inference request; downstream may 401")
+                logger.warning(
+                    "INFERENCE_AUTH: No API key resolved for inference request; downstream may 401"
+                )
             client = create_inference_client(task_app, api_key=api_key_override)
@@ -544,16 +615,16 @@ async def step_policy(
                         except Exception:
                             return max(1, int(len(text) / 4))
-                    def _count_messages_tokens(messages: List[Dict[str, Any]]) -> int:
+                    def _count_messages_tokens(messages: list[dict[str, Any]]) -> int:
                         total = 0
                         for m in messages:
                             total += _count_tokens(_content_to_text(m.get("content")))
                         return total
                     def _truncate_messages_to_budget(
-                        messages: List[Dict[str, Any]],
+                        messages: list[dict[str, Any]],
                         max_tokens: int,
-                    ) -> Tuple[List[Dict[str, Any]], int, int, int]:
+                    ) -> tuple[list[dict[str, Any]], int, int, int]:
                         before = _count_messages_tokens(messages)
                         if before <= max_tokens:
                             return messages, before, before, len(messages)
@@ -563,7 +634,7 @@ async def step_policy(
                         if messages and messages[0].get("role") == "system":
                             system_msg = messages[0]
                             start_idx = 1
-                        kept_rev: List[Dict[str, Any]] = []
+                        kept_rev: list[dict[str, Any]] = []
                         total = _count_messages_tokens([system_msg] if system_msg else [])
                         # Walk from the end keeping most recent messages
                         for m in reversed(messages[start_idx:]):
@@ -604,7 +675,7 @@ async def step_policy(
                     )
                     if new_msgs is not msgs:
                         inf_req["messages"] = new_msgs
-                        try:
+                        with contextlib.suppress(Exception):
                             logger.info(
                                 {
                                     "chat_truncated": True,
@@ -614,8 +685,6 @@ async def step_policy(
                                     "kept_msgs": int(kept_count),
                                 }
                             )
-                        except Exception:
-                            pass
             except Exception as _trunc_e:
                 logger.warning(f"CHAT_TRUNCATION_FAILED: {type(_trunc_e).__name__}: {_trunc_e}")
@@ -643,76 +712,78 @@ async def step_policy(
             # Prompt diagnostics before sending to inference: build chat template locally,
             # count tokens, and log the first 10k tokens if oversized. Also stash a
             # compact preview in meta so the trainer can surface it.
-            try:
+            with contextlib.suppress(Exception):
                 req_for_diag = meta.get("inference_request", {})
                 model_for_diag = req_for_diag.get("model") or getattr(policy, "model", None) or ""
                 messages_for_diag = req_for_diag.get("messages") or []
                 if model_for_diag and messages_for_diag:
-                    try:
-                        from transformers import AutoTokenizer
-                        tok = AutoTokenizer.from_pretrained(model_for_diag)
-                        prompt_preview = tok.apply_chat_template(
-                            messages_for_diag,
-                            add_generation_prompt=True,
-                            tokenize=False,
+                    from transformers import AutoTokenizer
+                    tok = AutoTokenizer.from_pretrained(model_for_diag)
+                    prompt_preview = tok.apply_chat_template(
+                        messages_for_diag,
+                        add_generation_prompt=True,
+                        tokenize=False,
+                    )
+                    ids = tok.encode(prompt_preview, add_special_tokens=False)
+                    max_len = getattr(tok, "model_max_length", None)
+                    over_limit = False
+                    with contextlib.suppress(Exception):
+                        over_limit = (
+                            isinstance(max_len, int) and max_len > 0 and len(ids) > int(max_len)
                         )
-                        ids = tok.encode(prompt_preview, add_special_tokens=False)
-                        max_len = getattr(tok, "model_max_length", None)
-                        over_limit = False
-                        try:
-                            over_limit = isinstance(max_len, int) and max_len > 0 and len(ids) > int(max_len)
-                        except Exception:
-                            over_limit = False
-                        if over_limit or len(ids) > 10000:
-                            preview_ids = ids[:10000]
-                            preview_text = tok.decode(preview_ids, skip_special_tokens=False)
-                            try:
-                                logger.warning(
-                                    {
-                                        "prompt_token_overflow_local": True,
-                                        "model": str(model_for_diag),
-                                        "token_count": int(len(ids)),
-                                        "model_max_length": int(max_len) if isinstance(max_len, int) else None,
-                                        "preview_tokens_logged": int(len(preview_ids)),
-                                        "prompt_preview_first_10k_tokens": preview_text,
-                                    }
-                                )
-                            except Exception:
-                                pass
-                            try:
-                                meta["prompt_debug"] = {
+                    if over_limit or len(ids) > 10000:
+                        preview_ids = ids[:10000]
+                        preview_text = tok.decode(
+                            preview_ids,
+                            skip_special_tokens=False,
+                        )
+                        with contextlib.suppress(Exception):
+                            logger.warning(
+                                {
+                                    "prompt_token_overflow_local": True,
+                                    "model": str(model_for_diag),
                                     "token_count": int(len(ids)),
-                                    "model_max_length": int(max_len) if isinstance(max_len, int) else None,
-                                    "preview_first_10k_tokens": preview_text,
+                                    "model_max_length": int(max_len)
+                                    if isinstance(max_len, int)
+                                    else None,
+                                    "preview_tokens_logged": int(len(preview_ids)),
+                                    "prompt_preview_first_10k_tokens": preview_text,
                                 }
-                            except Exception:
-                                pass
-                    except Exception:
-                        pass
-            except Exception:
-                pass
+                            )
+                        with contextlib.suppress(Exception):
+                            meta["prompt_debug"] = {
+                                "token_count": int(len(ids)),
+                                "model_max_length": int(max_len)
+                                if isinstance(max_len, int)
+                                else None,
+                                "preview_first_10k_tokens": preview_text,
+                            }
             # Emit the exact prompt/messages and tools before calling the LLM (bounded preview)
-            try:
+            with contextlib.suppress(Exception):
                 req_dump = meta.get("inference_request", {})
                 msgs = req_dump.get("messages")
                 tools_dump = req_dump.get("tools")
                 if isinstance(msgs, list):
                     # Print compact messages structure and tool schema with bounded length
                     import json as _json
                     msgs_compact = _json.dumps(msgs)[:20000]
-                    tools_compact = _json.dumps(tools_dump)[:8000] if tools_dump is not None else None
-                    print({
-                        "llm.call": True,
-                        "policy": str(policy_name),
-                        "messages_preview": msgs_compact,
-                        "tools_preview": tools_compact,
-                    })
-            except Exception:
-                pass
+                    tools_compact = (
+                        _json.dumps(tools_dump)[:8000] if tools_dump is not None else None
+                    )
+                    print(
+                        {
+                            "llm.call": True,
+                            "policy": str(policy_name),
+                            "messages_preview": msgs_compact,
+                            "tools_preview": tools_compact,
+                        }
+                    )
             # Normalize request for non-OpenAI endpoints (strict schemas)
-            try:
+            with contextlib.suppress(Exception):
                 base = str(target_url or "")
                 is_openai_dotcom = "openai.com" in base.lower()
                 if not is_openai_dotcom:
@@ -721,20 +792,25 @@ async def step_policy(
                         # Force structured tool_choice if a bare "required" is present
                         if req_body.get("tool_choice") == "required":
                             func_name = "interact_many"
-                            try:
+                            with contextlib.suppress(Exception):
                                 tools_arr = req_body.get("tools") or []
                                 if isinstance(tools_arr, list) and tools_arr:
-                                    f = tools_arr[0].get("function") if isinstance(tools_arr[0], dict) else None
+                                    f = (
+                                        tools_arr[0].get("function")
+                                        if isinstance(tools_arr[0], dict)
+                                        else None
+                                    )
                                     cand = (f or {}).get("name") if isinstance(f, dict) else None
                                     if isinstance(cand, str) and cand:
                                         func_name = cand
-                            except Exception:
-                                pass
-                            req_body["tool_choice"] = {"type": "function", "function": {"name": func_name}}
+                            req_body["tool_choice"] = {
+                                "type": "function",
+                                "function": {"name": func_name},
+                            }
                             req_body["parallel_tool_calls"] = False
                             req_body.setdefault("function_call", {"name": func_name})
                         # Inject extra_body for thinking controls expected by Modal service
-                        try:
+                        with contextlib.suppress(Exception):
                             tb = req_body.get("thinking_budget")
                             tm = str(req_body.get("thinking_mode") or "").lower()
                             enable_thinking = bool(tb) or tm == "think"
@@ -742,25 +818,52 @@ async def step_policy(
                             chat_kwargs = dict(extra.get("chat_template_kwargs") or {})
                             if enable_thinking:
                                 chat_kwargs["enable_thinking"] = True
-                            if isinstance(tb, (int, float, str)) and str(tb).strip():
-                                try:
+                            if isinstance(tb, int | float | str) and str(tb).strip():
+                                with contextlib.suppress(Exception):
                                     chat_kwargs["thinking_budget"] = int(tb)
-                                except Exception:
-                                    pass
                             if chat_kwargs:
                                 extra["chat_template_kwargs"] = chat_kwargs
                             # Ensure stop_after_tool_calls honored via extra_body for stricter servers
                             extra.setdefault("stop_after_tool_calls", 1)
                             if extra:
                                 req_body["extra_body"] = extra
-                        except Exception:
-                            pass
                         # Provide a conservative default temperature if missing
                         if "temperature" not in req_body:
                             req_body["temperature"] = 0.1
                         meta["inference_request"] = req_body
-            except Exception:
-                pass
+                # Strip image parts: Crafter policy currently only uses text prompts.
+                # Some providers reject image_url payloads entirely, so always flatten to plain text.
+                req_body2 = meta.get("inference_request", {})
+                if isinstance(req_body2, dict):
+                    msgs = req_body2.get("messages")
+                    if isinstance(msgs, list):
+                        new_msgs = []
+                        changed = False
+                        for m in msgs:
+                            try:
+                                if isinstance(m, dict):
+                                    content = m.get("content")
+                                    if isinstance(content, list):
+                                        parts: list[str] = []
+                                        for seg in content:
+                                            if isinstance(seg, dict):
+                                                txt = seg.get("text") or seg.get("content")
+                                                if isinstance(txt, str) and txt:
+                                                    parts.append(txt)
+                                        m2 = dict(m)
+                                        m2["content"] = "\n".join(parts)
+                                        new_msgs.append(m2)
+                                        changed = True
+                                    else:
+                                        new_msgs.append(m)
+                                else:
+                                    new_msgs.append(m)
+                            except Exception:
+                                new_msgs.append(m)
+                        if changed:
+                            req_body2["messages"] = new_msgs
+                            meta["inference_request"] = req_body2
             _t_start = _t.time()
             call_started_at = datetime.utcnow()
@@ -799,10 +902,13 @@ async def step_policy(
             else:
                 try:
                     import json as _json
-                    print({
-                        "tool_calls_parsed": int(len(tool_calls)),
-                        "tool_calls_preview": _json.dumps(tool_calls)[:20000],
-                    })
+                    print(
+                        {
+                            "tool_calls_parsed": int(len(tool_calls)),
+                            "tool_calls_preview": _json.dumps(tool_calls)[:20000],
+                        }
+                    )
                 except Exception:
                     logger.info(f"Parsed {len(tool_calls)} tool calls: {tool_calls}")
@@ -814,21 +920,17 @@ async def step_policy(
                         inference_response, getattr(policy, "use_tools", True)
                     )
                 else:
-                    parsed = policy.parse_model_response(
-                        inference_response, request.observation
-                    )
+                    parsed = policy.parse_model_response(inference_response, request.observation)
                 # Replace tool_calls with parsed result
                 if isinstance(parsed, list):
                     tool_calls = parsed
-                try:
+                with contextlib.suppress(Exception):
                     logger.info(
                         "TOOLCALL_PARSE: parsed=%d has_tools=%s example=%r",
                         len(tool_calls) if isinstance(tool_calls, list) else -1,
                         bool(getattr(policy, "use_tools", True)),
                         (tool_calls[0] if isinstance(tool_calls, list) and tool_calls else None),
                     )
-                except Exception:
-                    pass
             except Exception as _pe:
                 logger.warning(f"Failed to parse tool calls: {str(_pe)}")
             # Attach raw response + usage for observability
@@ -858,7 +960,7 @@ async def step_policy(
     except Exception as e:
         logger.error(f"Failed to step policy {request.policy_id}: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail=str(e)) from e
 @router.post("/snapshot", response_model=PolicySnapshotResponse)
@@ -866,9 +968,7 @@ async def snapshot_policy(request: PolicySnapshotRequest) -> PolicySnapshotRespo
     """Create a snapshot of the policy state."""
     handle = registry.get_policy(request.policy_id)
     if not handle:
-        raise HTTPException(
-            status_code=404, detail=f"Policy {request.policy_id} not found"
-        )
+        raise HTTPException(status_code=404, detail=f"Policy {request.policy_id} not found")
     try:
         # Serialize policy state
@@ -898,7 +998,7 @@ async def snapshot_policy(request: PolicySnapshotRequest) -> PolicySnapshotRespo
     except Exception as e:
         logger.error(f"Failed to snapshot policy {request.policy_id}: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail=str(e)) from e
 @router.post("/restore", response_model=PolicyRestoreResponse)
@@ -906,9 +1006,7 @@ async def restore_policy(request: PolicyRestoreRequest) -> PolicyRestoreResponse
     """Restore a policy from a snapshot."""
     snapshot = registry.get_snapshot(request.snapshot_id)
     if not snapshot:
-        raise HTTPException(
-            status_code=404, detail=f"Snapshot {request.snapshot_id} not found"
-        )
+        raise HTTPException(status_code=404, detail=f"Snapshot {request.snapshot_id} not found")
     if snapshot.kind != "policy":
         raise HTTPException(
@@ -931,16 +1029,20 @@ async def restore_policy(request: PolicyRestoreRequest) -> PolicyRestoreResponse
             policy = await CrafterPolicy.deserialize(state_dict)
         elif low in ["wordle-react", "wordle"]:
             try:
-                from .envs.wordle.policy import WordlePolicy as _WordlePolicy
+                from .envs.wordle.policy import WordlePolicy
             except Exception as e:
-                raise HTTPException(status_code=500, detail=f"Wordle policy unavailable: {e}")
-            policy = await _WordlePolicy.deserialize(state_dict)
+                raise HTTPException(
+                    status_code=500, detail=f"Wordle policy unavailable: {e}"
+                ) from e
+            policy = await WordlePolicy.deserialize(state_dict)
         elif low in ["sokoban-react", "sokoban"]:
             try:
-                from .envs.sokoban.policy import SokobanPolicy as _SokobanPolicy
+                from .envs.sokoban.policy import SokobanPolicy
             except Exception as e:
-                raise HTTPException(status_code=500, detail=f"Sokoban policy unavailable: {e}")
-            policy = await _SokobanPolicy.deserialize(state_dict)
+                raise HTTPException(
+                    status_code=500, detail=f"Sokoban policy unavailable: {e}"
+                ) from e
+            policy = await SokobanPolicy.deserialize(state_dict)
         else:
             raise HTTPException(
                 status_code=422,
@@ -956,10 +1058,8 @@ async def restore_policy(request: PolicyRestoreRequest) -> PolicyRestoreResponse
         return PolicyRestoreResponse(policy_id=policy_id)
     except Exception as e:
-        logger.error(
-            f"Failed to restore policy from snapshot {request.snapshot_id}: {e}"
-        )
-        raise HTTPException(status_code=500, detail=str(e))
+        logger.error(f"Failed to restore policy from snapshot {request.snapshot_id}: {e}")
+        raise HTTPException(status_code=500, detail=str(e)) from e
 @router.post("/terminate", response_model=PolicyTerminateResponse)
@@ -967,9 +1067,7 @@ async def terminate_policy(request: PolicyTerminateRequest) -> PolicyTerminateRe
     """Terminate a policy and clean up resources."""
     handle = registry.get_policy(request.policy_id)
     if not handle:
-        raise HTTPException(
-            status_code=404, detail=f"Policy {request.policy_id} not found"
-        )
+        raise HTTPException(status_code=404, detail=f"Policy {request.policy_id} not found")
     try:
         # Call terminate on the policy
@@ -982,4 +1080,4 @@ async def terminate_policy(request: PolicyTerminateRequest) -> PolicyTerminateRe
     except Exception as e:
         logger.error(f"Failed to terminate policy {request.policy_id}: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
+        raise HTTPException(status_code=500, detail=str(e)) from e

synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.10__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev5py3-none-any.whl → 0.2.10py3-none-any.whl