synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +63 -0
- synth_ai/api/train/builders.py +473 -0
- synth_ai/api/train/cli.py +1185 -0
- synth_ai/api/train/config_finder.py +246 -0
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +352 -0
- synth_ai/api/train/pollers.py +91 -0
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +195 -0
- synth_ai/api/train/utils.py +244 -0
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +90 -45
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +166 -114
- synth_ai/cli/root.py +143 -112
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +3134 -0
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +745 -416
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +7 -1
- synth_ai/demos/demo_task_apps/core.py +75 -37
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/config.toml +55 -110
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +116 -3
- synth_ai/task/apps/__init__.py +132 -0
- synth_ai/task/auth.py +165 -0
- synth_ai/task/client.py +167 -0
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +173 -57
- synth_ai/task/datasets.py +108 -0
- synth_ai/task/errors.py +50 -0
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +111 -0
- synth_ai/task/proxy.py +251 -0
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/server.py +432 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +95 -0
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +59 -0
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +86 -21
- synth_ai/tracing_v3/storage/base.py +98 -12
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -63
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -760
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
- synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,999 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
import contextlib
|
|
5
|
+
import json
|
|
6
|
+
import logging
|
|
7
|
+
import os
|
|
8
|
+
import time
|
|
9
|
+
from typing import Any
|
|
10
|
+
from urllib.parse import urlparse, urlunparse
|
|
11
|
+
|
|
12
|
+
import click
|
|
13
|
+
import httpx
|
|
14
|
+
|
|
15
|
+
logger = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class OpenAIClient:
|
|
19
|
+
"""Async HTTP client for OpenAI-compatible inference servers (vLLM)."""
|
|
20
|
+
|
|
21
|
+
def __init__(
|
|
22
|
+
self,
|
|
23
|
+
base_url: str,
|
|
24
|
+
api_key: str | None = None,
|
|
25
|
+
timeout_s: float = 120.0,
|
|
26
|
+
) -> None:
|
|
27
|
+
self.base_url = base_url.rstrip("/")
|
|
28
|
+
self.api_key = api_key
|
|
29
|
+
self.timeout_s = timeout_s
|
|
30
|
+
self.headers = {}
|
|
31
|
+
# If we're calling back into our own task app proxy (e.g., /proxy/groq),
|
|
32
|
+
# the FastAPI app still enforces X-API-Key. Include it when available so
|
|
33
|
+
# intra-app proxy calls authenticate correctly.
|
|
34
|
+
try:
|
|
35
|
+
env_key = os.getenv("ENVIRONMENT_API_KEY")
|
|
36
|
+
if env_key and isinstance(env_key, str):
|
|
37
|
+
self.headers.setdefault("X-API-Key", env_key)
|
|
38
|
+
except Exception:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
def _fix_model_parameters(
|
|
42
|
+
self, request: dict[str, Any], target_url: str | None = None
|
|
43
|
+
) -> dict[str, Any]:
|
|
44
|
+
"""
|
|
45
|
+
Fix parameter compatibility for newer OpenAI models.
|
|
46
|
+
|
|
47
|
+
Newer models like gpt-5-nano use 'max_completion_tokens' instead of 'max_tokens'.
|
|
48
|
+
"""
|
|
49
|
+
if not request:
|
|
50
|
+
return request
|
|
51
|
+
|
|
52
|
+
# Make a copy to avoid modifying the original
|
|
53
|
+
fixed_request = request.copy()
|
|
54
|
+
|
|
55
|
+
# Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI).
|
|
56
|
+
# Groq shares the API surface but we keep tool enforcement fields intact.
|
|
57
|
+
is_openai = False
|
|
58
|
+
is_groq = False
|
|
59
|
+
try:
|
|
60
|
+
if isinstance(target_url, str):
|
|
61
|
+
low = target_url.lower()
|
|
62
|
+
if "groq.com" in low or "/proxy/groq" in low:
|
|
63
|
+
is_groq = True
|
|
64
|
+
elif ("openai.com" in low) or ("azure" in low and ".openai." in low) or (
|
|
65
|
+
"/proxy/openai" in low
|
|
66
|
+
):
|
|
67
|
+
is_openai = True
|
|
68
|
+
except Exception:
|
|
69
|
+
is_openai = False
|
|
70
|
+
|
|
71
|
+
model = fixed_request.get("model", "")
|
|
72
|
+
|
|
73
|
+
if is_openai:
|
|
74
|
+
# Remove fields OpenAI/Groq don't accept
|
|
75
|
+
for k in (
|
|
76
|
+
"stop_after_tool_calls",
|
|
77
|
+
"thinking_mode",
|
|
78
|
+
"thinking_budget",
|
|
79
|
+
"reasoning",
|
|
80
|
+
"extra_body",
|
|
81
|
+
"parallel_tool_calls",
|
|
82
|
+
"function_call",
|
|
83
|
+
):
|
|
84
|
+
if k in fixed_request:
|
|
85
|
+
fixed_request.pop(k, None)
|
|
86
|
+
|
|
87
|
+
# GPT-5 family specifics
|
|
88
|
+
if "gpt-5" in model or "gpt-4.1" in model:
|
|
89
|
+
# Convert max_tokens to max_completion_tokens for newer models
|
|
90
|
+
if "max_tokens" in fixed_request:
|
|
91
|
+
if "max_completion_tokens" not in fixed_request:
|
|
92
|
+
fixed_request["max_completion_tokens"] = fixed_request.pop("max_tokens")
|
|
93
|
+
logger.info(
|
|
94
|
+
f"Converted max_tokens to max_completion_tokens for model {model}"
|
|
95
|
+
)
|
|
96
|
+
else:
|
|
97
|
+
fixed_request.pop("max_tokens")
|
|
98
|
+
logger.info(f"Removed conflicting max_tokens parameter for model {model}")
|
|
99
|
+
# Some OpenAI endpoints ignore/deny sampling fields for reasoning models
|
|
100
|
+
for k in ("temperature", "top_p"):
|
|
101
|
+
if k in fixed_request:
|
|
102
|
+
fixed_request.pop(k, None)
|
|
103
|
+
# If tools are present, force single tool choice to our function
|
|
104
|
+
try:
|
|
105
|
+
tools = fixed_request.get("tools")
|
|
106
|
+
if isinstance(tools, list) and tools:
|
|
107
|
+
# Choose the first provided function name from tools schema (e.g., run_command)
|
|
108
|
+
func_name = None
|
|
109
|
+
for t in tools:
|
|
110
|
+
try:
|
|
111
|
+
cand = None
|
|
112
|
+
if isinstance(t, dict):
|
|
113
|
+
f = t.get("function")
|
|
114
|
+
if isinstance(f, dict):
|
|
115
|
+
cand = f.get("name")
|
|
116
|
+
if isinstance(cand, str) and cand:
|
|
117
|
+
func_name = cand
|
|
118
|
+
break
|
|
119
|
+
except Exception:
|
|
120
|
+
continue
|
|
121
|
+
if not func_name:
|
|
122
|
+
func_name = "run_command"
|
|
123
|
+
fixed_request["tool_choice"] = {
|
|
124
|
+
"type": "function",
|
|
125
|
+
"function": {"name": func_name},
|
|
126
|
+
}
|
|
127
|
+
fixed_request["parallel_tool_calls"] = False
|
|
128
|
+
except Exception:
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
return fixed_request
|
|
132
|
+
|
|
133
|
+
async def generate(
|
|
134
|
+
self,
|
|
135
|
+
request: dict[str, Any],
|
|
136
|
+
base_url: str | None = None,
|
|
137
|
+
timeout_s: float | None = None,
|
|
138
|
+
extra_headers: dict[str, str] | None = None,
|
|
139
|
+
) -> dict[str, Any]:
|
|
140
|
+
"""
|
|
141
|
+
Send a chat completion request to the inference server.
|
|
142
|
+
|
|
143
|
+
Args:
|
|
144
|
+
request: OpenAI-compatible chat completion request
|
|
145
|
+
base_url: Override base URL for this request
|
|
146
|
+
timeout_s: Override timeout for this request
|
|
147
|
+
extra_headers: Additional headers to include (e.g., X-Policy-Name)
|
|
148
|
+
|
|
149
|
+
Returns:
|
|
150
|
+
OpenAI-compatible chat completion response
|
|
151
|
+
"""
|
|
152
|
+
base = (base_url or self.base_url).rstrip("/")
|
|
153
|
+
# Ensure processed_request is defined for error logging paths
|
|
154
|
+
processed_request: dict[str, Any] = dict(request or {})
|
|
155
|
+
|
|
156
|
+
# Bulletproof normalization BEFORE any parsing
|
|
157
|
+
def _local_force_normalize(u: str) -> str:
|
|
158
|
+
if not isinstance(u, str) or not u:
|
|
159
|
+
return u
|
|
160
|
+
p = urlparse(u)
|
|
161
|
+
path = (p.path or "").rstrip("/")
|
|
162
|
+
q = p.query or ""
|
|
163
|
+
# If query contains a path segment, extract and repair
|
|
164
|
+
if q and "/" in q:
|
|
165
|
+
before, after = q.split("/", 1)
|
|
166
|
+
# Split off any extra query parameters that were appended after the path
|
|
167
|
+
cut_positions = [i for i in [after.find("&"), after.find("?")] if i >= 0]
|
|
168
|
+
cut = min(cut_positions) if cut_positions else len(after)
|
|
169
|
+
path_from_query = "/" + after[:cut]
|
|
170
|
+
extra_query = after[cut + 1 :] if cut < len(after) else ""
|
|
171
|
+
merged_query = before
|
|
172
|
+
if extra_query:
|
|
173
|
+
merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
|
|
174
|
+
# Ensure final path
|
|
175
|
+
final_path = path_from_query if path_from_query.startswith("/v1/chat/completions") else f"{path_from_query.rstrip('/')}/v1/chat/completions"
|
|
176
|
+
p = p._replace(path=final_path, query=merged_query)
|
|
177
|
+
u = urlunparse(p)
|
|
178
|
+
p = urlparse(u)
|
|
179
|
+
path = p.path or ""
|
|
180
|
+
q = p.query or ""
|
|
181
|
+
if not path.endswith("/v1/chat/completions"):
|
|
182
|
+
new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
|
|
183
|
+
p = p._replace(path=new_path)
|
|
184
|
+
u = urlunparse(p)
|
|
185
|
+
p = urlparse(u)
|
|
186
|
+
q = p.query or ""
|
|
187
|
+
if q and "/" in q:
|
|
188
|
+
# Last-resort: drop anything after first '/'
|
|
189
|
+
safe_q = q.split("/")[0]
|
|
190
|
+
p = p._replace(query=safe_q)
|
|
191
|
+
u = urlunparse(p)
|
|
192
|
+
return u
|
|
193
|
+
|
|
194
|
+
norm_base = None
|
|
195
|
+
try:
|
|
196
|
+
# Try importing shared normalizer first
|
|
197
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.utils import (
|
|
198
|
+
force_normalize_chat_completions_url,
|
|
199
|
+
)
|
|
200
|
+
norm_base = force_normalize_chat_completions_url(base)
|
|
201
|
+
except Exception:
|
|
202
|
+
norm_base = _local_force_normalize(base)
|
|
203
|
+
base = norm_base or base
|
|
204
|
+
# Parse URL to handle query parameters correctly
|
|
205
|
+
parsed = urlparse(base)
|
|
206
|
+
path = parsed.path.rstrip("/")
|
|
207
|
+
query = parsed.query
|
|
208
|
+
|
|
209
|
+
# Debug: Log URL parsing
|
|
210
|
+
logger.error(f"[URL_PARSE] base={base} parsed.path={parsed.path} parsed.query={parsed.query}")
|
|
211
|
+
|
|
212
|
+
# CRITICAL FIX: Handle malformed URLs where path is incorrectly in the query string
|
|
213
|
+
# Example: https://host?cid=trace_123/v1/chat/completions
|
|
214
|
+
# Should be: https://host/v1/chat/completions?cid=trace_123
|
|
215
|
+
|
|
216
|
+
# ALWAYS check for malformed URLs - this is CRITICAL
|
|
217
|
+
# CRASH IMMEDIATELY if URL is malformed - don't let it through!
|
|
218
|
+
if query and "/" in query:
|
|
219
|
+
logger.error(f"[URL_FATAL] MALFORMED URL DETECTED AT START: base={base} query={query}")
|
|
220
|
+
# Try to fix it
|
|
221
|
+
logger.error(f"[URL_FIX_TRIGGERED] Query contains '/': query={query}")
|
|
222
|
+
# This is a malformed URL - extract path from query and fix it
|
|
223
|
+
logger.error(
|
|
224
|
+
f"[URL_FIX] Malformed URL detected: {base}\n"
|
|
225
|
+
f"Query contains path segments. Fixing..."
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
# Find where the path starts in the query string
|
|
229
|
+
# The query format is: "cid=value/path" or similar
|
|
230
|
+
# We need to find the first "/" that starts a path segment
|
|
231
|
+
query_parts = query.split("/", 1)
|
|
232
|
+
if len(query_parts) == 2:
|
|
233
|
+
# query_parts[0] is the actual query (e.g., "cid=trace_123")
|
|
234
|
+
# query_parts[1] is the path that was incorrectly put in query
|
|
235
|
+
actual_query = query_parts[0]
|
|
236
|
+
path_and_more = query_parts[1] # Could be "v1/chat/completions" or "v1/chat/completions&foo=bar"
|
|
237
|
+
|
|
238
|
+
# Extract the path part (everything before "&" or "?" if present)
|
|
239
|
+
# Handle both "&" (query param separator) and "?" (another malformed query separator)
|
|
240
|
+
if "&" in path_and_more:
|
|
241
|
+
# Path is followed by more query params (separated by &)
|
|
242
|
+
path_segment, extra_query = path_and_more.split("&", 1)
|
|
243
|
+
path_in_query = "/" + path_segment # Restore leading slash
|
|
244
|
+
# Merge extra query params with actual_query
|
|
245
|
+
actual_query = f"{actual_query}&{extra_query}"
|
|
246
|
+
elif "?" in path_and_more:
|
|
247
|
+
# Path is followed by more query params (separated by ?, which is malformed)
|
|
248
|
+
path_segment, extra_query = path_and_more.split("?", 1)
|
|
249
|
+
path_in_query = "/" + path_segment # Restore leading slash
|
|
250
|
+
# Merge extra query params with actual_query (use & as separator)
|
|
251
|
+
actual_query = f"{actual_query}&{extra_query}"
|
|
252
|
+
else:
|
|
253
|
+
# No extra query params, just the path
|
|
254
|
+
path_in_query = "/" + path_and_more # Restore leading slash
|
|
255
|
+
|
|
256
|
+
# If the path_in_query already contains /v1/chat/completions, use it
|
|
257
|
+
# Otherwise, append /v1/chat/completions
|
|
258
|
+
if path_in_query.startswith("/v1/chat/completions"):
|
|
259
|
+
final_path = path_in_query
|
|
260
|
+
else:
|
|
261
|
+
# Append /v1/chat/completions to whatever path we found
|
|
262
|
+
final_path = path_in_query.rstrip("/") + "/v1/chat/completions"
|
|
263
|
+
|
|
264
|
+
# Reconstruct URL correctly: path comes before query
|
|
265
|
+
parsed = parsed._replace(path=final_path, query=actual_query)
|
|
266
|
+
url = urlunparse(parsed)
|
|
267
|
+
logger.warning(f"[URL_FIX] Fixed malformed URL:\n FROM: {base}\n TO: {url}")
|
|
268
|
+
else:
|
|
269
|
+
# Can't parse, fall through to normal processing
|
|
270
|
+
logger.error(f"[URL_FIX] Could not parse malformed query: {query}")
|
|
271
|
+
path = parsed.path.rstrip("/")
|
|
272
|
+
if not path.endswith("/v1/chat/completions"):
|
|
273
|
+
new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
|
|
274
|
+
parsed = parsed._replace(path=new_path)
|
|
275
|
+
url = urlunparse(parsed)
|
|
276
|
+
else:
|
|
277
|
+
url = base
|
|
278
|
+
# Normal case: query params are separate from path
|
|
279
|
+
elif path.endswith("/v1/chat/completions"):
|
|
280
|
+
url = base
|
|
281
|
+
else:
|
|
282
|
+
# Append /v1/chat/completions to the path, preserving query params
|
|
283
|
+
new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
|
|
284
|
+
parsed = parsed._replace(path=new_path)
|
|
285
|
+
url = urlunparse(parsed)
|
|
286
|
+
logger.debug(f"[URL_CONSTRUCT] Added path to URL: {base} -> {url}")
|
|
287
|
+
|
|
288
|
+
# FINAL VALIDATION: Ensure the constructed URL is correct
|
|
289
|
+
final_parsed = urlparse(url)
|
|
290
|
+
final_path = final_parsed.path or ""
|
|
291
|
+
final_query = final_parsed.query or ""
|
|
292
|
+
|
|
293
|
+
# Verify path is correct
|
|
294
|
+
if not final_path.endswith("/v1/chat/completions"):
|
|
295
|
+
error_msg = (
|
|
296
|
+
f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
|
|
297
|
+
f"Original: {base}\n"
|
|
298
|
+
f"Constructed: {url}\n"
|
|
299
|
+
f"Path: {final_path}\n"
|
|
300
|
+
)
|
|
301
|
+
logger.error(error_msg)
|
|
302
|
+
raise ValueError(error_msg)
|
|
303
|
+
|
|
304
|
+
# Verify query doesn't contain path segments
|
|
305
|
+
if final_query and "/" in final_query:
|
|
306
|
+
error_msg = (
|
|
307
|
+
f"FATAL [OpenAIClient]: Query still contains path segments after fix!\n"
|
|
308
|
+
f"Original: {base}\n"
|
|
309
|
+
f"Constructed: {url}\n"
|
|
310
|
+
f"Query: {final_query}\n"
|
|
311
|
+
f"This indicates a bug in URL construction logic."
|
|
312
|
+
)
|
|
313
|
+
logger.error(error_msg)
|
|
314
|
+
raise ValueError(error_msg)
|
|
315
|
+
|
|
316
|
+
timeout = timeout_s or self.timeout_s
|
|
317
|
+
|
|
318
|
+
# Merge headers
|
|
319
|
+
headers = self.headers.copy()
|
|
320
|
+
if extra_headers:
|
|
321
|
+
headers.update(extra_headers)
|
|
322
|
+
# Always include X-API-Key for intra-app requests
|
|
323
|
+
try:
|
|
324
|
+
envk = os.getenv("ENVIRONMENT_API_KEY")
|
|
325
|
+
if envk and isinstance(envk, str):
|
|
326
|
+
headers["X-API-Key"] = envk
|
|
327
|
+
except Exception:
|
|
328
|
+
pass
|
|
329
|
+
|
|
330
|
+
# Set Authorization header based on the target URL
|
|
331
|
+
try:
|
|
332
|
+
low_url = (url or "").lower()
|
|
333
|
+
|
|
334
|
+
# If calling OpenAI directly (api.openai.com)
|
|
335
|
+
if "api.openai.com" in low_url:
|
|
336
|
+
openai_key = os.getenv("OPENAI_API_KEY")
|
|
337
|
+
if openai_key and isinstance(openai_key, str):
|
|
338
|
+
headers["Authorization"] = f"Bearer {openai_key}"
|
|
339
|
+
|
|
340
|
+
# If target is Synth backend (any deployment), use SYNTH_API_KEY
|
|
341
|
+
# Matches: synth-backend-*, agent-learning*, localhost:8000, 127.0.0.1:8000
|
|
342
|
+
elif any(pattern in low_url for pattern in [
|
|
343
|
+
"synth-backend", "synth.run", "agent-learning",
|
|
344
|
+
"localhost:8000", "127.0.0.1:8000"
|
|
345
|
+
]):
|
|
346
|
+
synth_key = os.getenv("SYNTH_API_KEY")
|
|
347
|
+
if synth_key and isinstance(synth_key, str):
|
|
348
|
+
headers["Authorization"] = f"Bearer {synth_key}"
|
|
349
|
+
|
|
350
|
+
# If target is Groq, use GROQ_API_KEY
|
|
351
|
+
elif "/proxy/groq" in low_url or "api.groq.com" in low_url:
|
|
352
|
+
gk = os.getenv("GROQ_API_KEY")
|
|
353
|
+
if gk and isinstance(gk, str):
|
|
354
|
+
headers["Authorization"] = f"Bearer {gk}"
|
|
355
|
+
except Exception:
|
|
356
|
+
pass
|
|
357
|
+
|
|
358
|
+
# In-process proxy path: avoid HTTP round-trip and auth dependency
|
|
359
|
+
try:
|
|
360
|
+
if base.endswith("/proxy/groq") or base.endswith("/proxy/groq/"):
|
|
361
|
+
from synth_ai.task.server import prepare_for_groq, inject_system_hint
|
|
362
|
+
# Prepare payload similar to server-side proxy
|
|
363
|
+
model = request.get("model") if isinstance(request.get("model"), str) else None
|
|
364
|
+
payload = prepare_for_groq(model, request)
|
|
365
|
+
payload = inject_system_hint(payload, "")
|
|
366
|
+
# Call vendor directly
|
|
367
|
+
gk = os.getenv("GROQ_API_KEY") or ""
|
|
368
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
369
|
+
resp = await client.post(
|
|
370
|
+
"https://api.groq.com/openai/v1/chat/completions",
|
|
371
|
+
json=payload,
|
|
372
|
+
headers={"Authorization": f"Bearer {gk}"},
|
|
373
|
+
)
|
|
374
|
+
resp.raise_for_status()
|
|
375
|
+
return resp.json()
|
|
376
|
+
except Exception as _local_proxy_err:
|
|
377
|
+
# Do NOT fall back silently; surface the error so callers fail fast
|
|
378
|
+
raise
|
|
379
|
+
|
|
380
|
+
# DEBUG: Log request BEFORE _fix_model_parameters
|
|
381
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Request message[1] content type: {type(request.get('messages', [])[1].get('content') if len(request.get('messages', [])) > 1 else None)}")
|
|
382
|
+
if len(request.get("messages", [])) > 1:
|
|
383
|
+
msg1_content = request["messages"][1].get("content")
|
|
384
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Message[1] content value: {msg1_content if not isinstance(msg1_content, list) else f'list[{len(msg1_content)}]'}")
|
|
385
|
+
|
|
386
|
+
# Fix parameter compatibility for newer models
|
|
387
|
+
processed_request = self._fix_model_parameters(request, target_url=url)
|
|
388
|
+
|
|
389
|
+
# DEBUG: Log request AFTER _fix_model_parameters
|
|
390
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Processed message[1] content type: {type(processed_request.get('messages', [])[1].get('content') if len(processed_request.get('messages', [])) > 1 else None)}")
|
|
391
|
+
if len(processed_request.get("messages", [])) > 1:
|
|
392
|
+
msg1_content_post = processed_request["messages"][1].get("content")
|
|
393
|
+
logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
|
|
394
|
+
|
|
395
|
+
# Log request (redact messages in production)
|
|
396
|
+
# CRITICAL: Verify URL is correct BEFORE making HTTP request
|
|
397
|
+
final_parsed_check = urlparse(url)
|
|
398
|
+
logger.error(f"[URL_FINAL_CHECK] Before HTTP request: url={url} path={final_parsed_check.path} query={final_parsed_check.query}")
|
|
399
|
+
|
|
400
|
+
# CRASH IF URL IS STILL MALFORMED - DO NOT PROCEED
|
|
401
|
+
if final_parsed_check.query and "/" in final_parsed_check.query:
|
|
402
|
+
error_msg = (
|
|
403
|
+
f"FATAL [OpenAIClient]: URL IS STILL MALFORMED AFTER FIX ATTEMPT!\n"
|
|
404
|
+
f"Original base_url: {base_url or self.base_url}\n"
|
|
405
|
+
f"Constructed URL: {url}\n"
|
|
406
|
+
f"Path: {final_parsed_check.path}\n"
|
|
407
|
+
f"Query (contains path): {final_parsed_check.query}\n"
|
|
408
|
+
f"This will cause a 404 error. CRASHING NOW to prevent bad request."
|
|
409
|
+
)
|
|
410
|
+
logger.error(error_msg)
|
|
411
|
+
raise ValueError(error_msg)
|
|
412
|
+
|
|
413
|
+
# Verify path is correct
|
|
414
|
+
if not final_parsed_check.path.endswith("/v1/chat/completions"):
|
|
415
|
+
error_msg = (
|
|
416
|
+
f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
|
|
417
|
+
f"URL: {url}\n"
|
|
418
|
+
f"Path: {final_parsed_check.path}\n"
|
|
419
|
+
)
|
|
420
|
+
logger.error(error_msg)
|
|
421
|
+
raise ValueError(error_msg)
|
|
422
|
+
|
|
423
|
+
# Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
|
|
424
|
+
logger.info(f"Inference POST target: {url}")
|
|
425
|
+
if extra_headers:
|
|
426
|
+
logger.info(f"Extra headers: {extra_headers}")
|
|
427
|
+
with contextlib.suppress(Exception):
|
|
428
|
+
keys_preview = sorted(processed_request.keys())
|
|
429
|
+
logger.info(f"Request keys: {keys_preview}")
|
|
430
|
+
|
|
431
|
+
# Detailed IO log: messages/tools/sampling and final payload fields
|
|
432
|
+
try:
|
|
433
|
+
import json as _json
|
|
434
|
+
|
|
435
|
+
def _truncate(text: str, limit: int = 2000) -> str:
|
|
436
|
+
return text if len(text) <= limit else text[:limit] + "…"
|
|
437
|
+
|
|
438
|
+
def _messages_preview(msgs: Any) -> str:
|
|
439
|
+
try:
|
|
440
|
+
out: list[dict[str, Any]] = []
|
|
441
|
+
if isinstance(msgs, list):
|
|
442
|
+
for m in msgs:
|
|
443
|
+
if not isinstance(m, dict):
|
|
444
|
+
continue
|
|
445
|
+
role = m.get("role")
|
|
446
|
+
content = m.get("content")
|
|
447
|
+
if isinstance(content, str):
|
|
448
|
+
text = content
|
|
449
|
+
elif isinstance(content, list):
|
|
450
|
+
parts: list[str] = []
|
|
451
|
+
for seg in content:
|
|
452
|
+
if isinstance(seg, dict) and isinstance(seg.get("text"), str):
|
|
453
|
+
parts.append(seg["text"])
|
|
454
|
+
text = "\n".join(parts)
|
|
455
|
+
else:
|
|
456
|
+
text = ""
|
|
457
|
+
out.append({"role": role, "content": _truncate(str(text), 4000)})
|
|
458
|
+
return _json.dumps(out)
|
|
459
|
+
except Exception:
|
|
460
|
+
return "[]"
|
|
461
|
+
|
|
462
|
+
def _tools_preview(tools: Any) -> str:
|
|
463
|
+
try:
|
|
464
|
+
return _truncate(_json.dumps(tools), 4000)
|
|
465
|
+
except Exception:
|
|
466
|
+
return "[]"
|
|
467
|
+
|
|
468
|
+
msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
|
|
469
|
+
tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
|
|
470
|
+
io_log: dict[str, Any] = {
|
|
471
|
+
"llm.call": True,
|
|
472
|
+
"model": processed_request.get("model") if isinstance(processed_request, dict) else None,
|
|
473
|
+
"tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
|
|
474
|
+
"parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
|
|
475
|
+
"stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
|
|
476
|
+
"temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
|
|
477
|
+
"top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
|
|
478
|
+
"max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
|
|
479
|
+
"max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
|
|
480
|
+
"messages_preview": _messages_preview(msgs),
|
|
481
|
+
"tools_preview": _tools_preview(tools),
|
|
482
|
+
}
|
|
483
|
+
logger.info(io_log)
|
|
484
|
+
except Exception:
|
|
485
|
+
pass
|
|
486
|
+
|
|
487
|
+
# Final hard-guard for OpenAI/Groq: drop unsupported field
|
|
488
|
+
try:
|
|
489
|
+
low_url = url.lower()
|
|
490
|
+
if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
|
|
491
|
+
processed_request.pop("stop_after_tool_calls", None)
|
|
492
|
+
logger.info("Removed stop_after_tool_calls for %s request", "Groq/OpenAI")
|
|
493
|
+
# Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
|
|
494
|
+
if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
|
|
495
|
+
processed_request, dict
|
|
496
|
+
):
|
|
497
|
+
rf = processed_request.get("response_format")
|
|
498
|
+
rf_type = None
|
|
499
|
+
if isinstance(rf, dict):
|
|
500
|
+
rf_type = str(rf.get("type") or "").lower()
|
|
501
|
+
if rf_type in {"json_object", "json_schema"}:
|
|
502
|
+
msgs = processed_request.get("messages")
|
|
503
|
+
has_json_word = False
|
|
504
|
+
if isinstance(msgs, list):
|
|
505
|
+
for m in msgs:
|
|
506
|
+
try:
|
|
507
|
+
content = m.get("content") if isinstance(m, dict) else None
|
|
508
|
+
text = None
|
|
509
|
+
if isinstance(content, str):
|
|
510
|
+
text = content
|
|
511
|
+
elif isinstance(content, list):
|
|
512
|
+
# Join any text segments
|
|
513
|
+
parts = []
|
|
514
|
+
for seg in content:
|
|
515
|
+
if isinstance(seg, dict) and isinstance(
|
|
516
|
+
seg.get("text"), str
|
|
517
|
+
):
|
|
518
|
+
parts.append(seg["text"])
|
|
519
|
+
text = "\n".join(parts)
|
|
520
|
+
if isinstance(text, str) and ("json" in text.lower()):
|
|
521
|
+
has_json_word = True
|
|
522
|
+
break
|
|
523
|
+
except Exception:
|
|
524
|
+
continue
|
|
525
|
+
if not has_json_word:
|
|
526
|
+
try:
|
|
527
|
+
instruction = (
|
|
528
|
+
"Respond in strict JSON only. Output a single valid JSON object."
|
|
529
|
+
)
|
|
530
|
+
if not isinstance(msgs, list):
|
|
531
|
+
msgs = []
|
|
532
|
+
# Prepend a system message to satisfy Groq requirement without changing user intent
|
|
533
|
+
prepend = {"role": "system", "content": instruction}
|
|
534
|
+
processed_request["messages"] = [prepend] + list(msgs)
|
|
535
|
+
logger.info(
|
|
536
|
+
"Injected JSON-mode system instruction for Groq response_format compliance"
|
|
537
|
+
)
|
|
538
|
+
except Exception:
|
|
539
|
+
pass
|
|
540
|
+
except Exception:
|
|
541
|
+
pass
|
|
542
|
+
|
|
543
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
544
|
+
try:
|
|
545
|
+
response = await client.post(
|
|
546
|
+
url,
|
|
547
|
+
json=processed_request,
|
|
548
|
+
headers=headers,
|
|
549
|
+
)
|
|
550
|
+
response.raise_for_status()
|
|
551
|
+
|
|
552
|
+
# Rich response diagnostics
|
|
553
|
+
content_type = response.headers.get("content-type")
|
|
554
|
+
body_text = response.text
|
|
555
|
+
logger.info(
|
|
556
|
+
f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
|
|
557
|
+
)
|
|
558
|
+
if body_text:
|
|
559
|
+
# Log raw output with generous preview to debug no-tool-call issues
|
|
560
|
+
preview_len = min(4000, len(body_text))
|
|
561
|
+
logger.info({
|
|
562
|
+
"llm.raw_response": True,
|
|
563
|
+
"bytes": len(body_text),
|
|
564
|
+
"preview": body_text[:preview_len],
|
|
565
|
+
})
|
|
566
|
+
|
|
567
|
+
result = response.json()
|
|
568
|
+
logger.info(f"Inference response parsed_type={type(result).__name__}")
|
|
569
|
+
|
|
570
|
+
tool_call_count = -1
|
|
571
|
+
# Normalize tool calls so downstream always sees a function tool call
|
|
572
|
+
try:
|
|
573
|
+
if isinstance(result, dict):
|
|
574
|
+
choices = result.get("choices")
|
|
575
|
+
if isinstance(choices, list) and choices:
|
|
576
|
+
msg = choices[0].get("message")
|
|
577
|
+
if isinstance(msg, dict):
|
|
578
|
+
# Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
|
|
579
|
+
tc = msg.get("tool_calls")
|
|
580
|
+
fc = msg.get("function_call")
|
|
581
|
+
if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
|
|
582
|
+
name = fc.get("name") or "interact_many"
|
|
583
|
+
args = fc.get("arguments") or "{}"
|
|
584
|
+
msg["tool_calls"] = [
|
|
585
|
+
{
|
|
586
|
+
"id": "call_norm",
|
|
587
|
+
"type": "function",
|
|
588
|
+
"function": {"name": name, "arguments": args},
|
|
589
|
+
}
|
|
590
|
+
]
|
|
591
|
+
if isinstance(choices[0], dict):
|
|
592
|
+
choices[0]["finish_reason"] = "tool_calls"
|
|
593
|
+
# Log tool call count for debugging
|
|
594
|
+
try:
|
|
595
|
+
tc2 = msg.get("tool_calls")
|
|
596
|
+
count = len(tc2) if isinstance(tc2, list) else 0
|
|
597
|
+
logger.info({
|
|
598
|
+
"llm.tool_calls": True,
|
|
599
|
+
"count": count,
|
|
600
|
+
"finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
|
|
601
|
+
})
|
|
602
|
+
if count == 0:
|
|
603
|
+
click.echo(
|
|
604
|
+
"[openai-client] ✗ upstream response missing tool_calls; dumping preview to logs",
|
|
605
|
+
err=True,
|
|
606
|
+
)
|
|
607
|
+
logger.error(
|
|
608
|
+
"Inference response missing tool_calls; failing fast. Raw body preview: %s",
|
|
609
|
+
body_text[:500] if body_text else "<empty>",
|
|
610
|
+
)
|
|
611
|
+
raise ValueError("Inference response missing tool_calls")
|
|
612
|
+
tool_call_count = count
|
|
613
|
+
except Exception:
|
|
614
|
+
pass
|
|
615
|
+
except Exception:
|
|
616
|
+
pass
|
|
617
|
+
|
|
618
|
+
click.echo(
|
|
619
|
+
f"[openai-client] ✓ response ok with tool_calls={tool_call_count}",
|
|
620
|
+
err=True,
|
|
621
|
+
)
|
|
622
|
+
return result
|
|
623
|
+
|
|
624
|
+
except httpx.TimeoutException:
|
|
625
|
+
logger.error(f"Request to {url} timed out after {timeout}s")
|
|
626
|
+
raise
|
|
627
|
+
except httpx.HTTPStatusError as e:
|
|
628
|
+
status = e.response.status_code if e.response is not None else None
|
|
629
|
+
text = e.response.text if e.response is not None else str(e)
|
|
630
|
+
# Log full body and request diagnostics for debugging remote failures
|
|
631
|
+
try:
|
|
632
|
+
redacted_headers = dict(headers)
|
|
633
|
+
if "Authorization" in redacted_headers:
|
|
634
|
+
redacted_headers["Authorization"] = "***REDACTED***"
|
|
635
|
+
logger.error(
|
|
636
|
+
{
|
|
637
|
+
"openai_http_error": True,
|
|
638
|
+
"status": status,
|
|
639
|
+
"url": url,
|
|
640
|
+
"body": text,
|
|
641
|
+
}
|
|
642
|
+
)
|
|
643
|
+
logger.error(
|
|
644
|
+
{
|
|
645
|
+
"request_debug": True,
|
|
646
|
+
"status": status,
|
|
647
|
+
"target": url,
|
|
648
|
+
"headers": redacted_headers,
|
|
649
|
+
"payload": processed_request,
|
|
650
|
+
}
|
|
651
|
+
)
|
|
652
|
+
except Exception:
|
|
653
|
+
logger.error(f"HTTP error from {url}: {status} - {text}")
|
|
654
|
+
# Special case: token budget exceeded handled below, else 422 degrade, else re-raise
|
|
655
|
+
try:
|
|
656
|
+
if status == 400 and e.response is not None:
|
|
657
|
+
data = e.response.json()
|
|
658
|
+
detail = data.get("detail") if isinstance(data, dict) else None
|
|
659
|
+
err_code = (detail or {}).get("error") if isinstance(detail, dict) else None
|
|
660
|
+
if err_code == "token_budget_exceeded":
|
|
661
|
+
info = (detail or {}).get("details") or {}
|
|
662
|
+
messages_tokens = int(info.get("messages_tokens") or 0)
|
|
663
|
+
model_limit = int(info.get("model_limit") or 0)
|
|
664
|
+
safety = 64
|
|
665
|
+
# Compute a conservative new max_tokens
|
|
666
|
+
new_max = max(16, model_limit - messages_tokens - safety)
|
|
667
|
+
try:
|
|
668
|
+
# Update request and retry once immediately with smaller budget
|
|
669
|
+
if isinstance(processed_request, dict):
|
|
670
|
+
processed_request = dict(processed_request)
|
|
671
|
+
if "max_completion_tokens" in processed_request:
|
|
672
|
+
processed_request["max_completion_tokens"] = new_max
|
|
673
|
+
processed_request.pop("max_tokens", None)
|
|
674
|
+
else:
|
|
675
|
+
processed_request["max_tokens"] = new_max
|
|
676
|
+
# Remove optional fields that some servers reject
|
|
677
|
+
for k in ("thinking_mode", "thinking_budget", "reasoning"):
|
|
678
|
+
processed_request.pop(k, None)
|
|
679
|
+
# Force structured tool choice
|
|
680
|
+
if processed_request.get("tool_choice") == "required":
|
|
681
|
+
func_name = "run_command"
|
|
682
|
+
try:
|
|
683
|
+
tools_arr = processed_request.get("tools") or []
|
|
684
|
+
if isinstance(tools_arr, list) and tools_arr:
|
|
685
|
+
f = (
|
|
686
|
+
tools_arr[0].get("function")
|
|
687
|
+
if isinstance(tools_arr[0], dict)
|
|
688
|
+
else None
|
|
689
|
+
)
|
|
690
|
+
cand = (
|
|
691
|
+
(f or {}).get("name")
|
|
692
|
+
if isinstance(f, dict)
|
|
693
|
+
else None
|
|
694
|
+
)
|
|
695
|
+
if isinstance(cand, str) and cand:
|
|
696
|
+
func_name = cand
|
|
697
|
+
except Exception:
|
|
698
|
+
pass
|
|
699
|
+
processed_request["tool_choice"] = {
|
|
700
|
+
"type": "function",
|
|
701
|
+
"function": {"name": func_name},
|
|
702
|
+
}
|
|
703
|
+
processed_request["parallel_tool_calls"] = False
|
|
704
|
+
logger.warning(
|
|
705
|
+
{
|
|
706
|
+
"token_budget_recovery": True,
|
|
707
|
+
"messages_tokens": messages_tokens,
|
|
708
|
+
"model_limit": model_limit,
|
|
709
|
+
"retry_max_tokens": new_max,
|
|
710
|
+
}
|
|
711
|
+
)
|
|
712
|
+
# Retry once with reduced budget
|
|
713
|
+
async with httpx.AsyncClient(timeout=timeout) as client2:
|
|
714
|
+
r2 = await client2.post(
|
|
715
|
+
url, json=processed_request, headers=headers
|
|
716
|
+
)
|
|
717
|
+
r2.raise_for_status()
|
|
718
|
+
return r2.json()
|
|
719
|
+
except Exception:
|
|
720
|
+
pass
|
|
721
|
+
except Exception:
|
|
722
|
+
pass
|
|
723
|
+
raise
|
|
724
|
+
except Exception as e:
|
|
725
|
+
logger.error(f"Unexpected error calling {url}: {e}")
|
|
726
|
+
raise
|
|
727
|
+
|
|
728
|
+
async def check_health(
|
|
729
|
+
self,
|
|
730
|
+
base_url: str | None = None,
|
|
731
|
+
timeout_s: float | None = None,
|
|
732
|
+
) -> dict[str, Any]:
|
|
733
|
+
"""
|
|
734
|
+
Check if the inference service is healthy.
|
|
735
|
+
|
|
736
|
+
Args:
|
|
737
|
+
base_url: Override base URL for this request
|
|
738
|
+
timeout_s: Override timeout for this request
|
|
739
|
+
|
|
740
|
+
Returns:
|
|
741
|
+
Health status dict with 'status' field
|
|
742
|
+
"""
|
|
743
|
+
url = (base_url or self.base_url).rstrip("/") + "/health"
|
|
744
|
+
timeout = timeout_s or 10.0
|
|
745
|
+
|
|
746
|
+
try:
|
|
747
|
+
async with httpx.AsyncClient(timeout=timeout) as client:
|
|
748
|
+
response = await client.get(url, headers=self.headers)
|
|
749
|
+
response.raise_for_status()
|
|
750
|
+
return response.json()
|
|
751
|
+
except httpx.HTTPStatusError as e:
|
|
752
|
+
if e.response.status_code == 400:
|
|
753
|
+
# Service is overloaded but still responding
|
|
754
|
+
try:
|
|
755
|
+
data = e.response.json()
|
|
756
|
+
if data.get("status") == "overloaded":
|
|
757
|
+
return {"status": "overloaded", "retry_after": data.get("retry_after", 1)}
|
|
758
|
+
except Exception:
|
|
759
|
+
pass
|
|
760
|
+
return {"status": "unhealthy", "error": str(e)}
|
|
761
|
+
except Exception as e:
|
|
762
|
+
return {"status": "unhealthy", "error": str(e)}
|
|
763
|
+
|
|
764
|
+
async def generate_with_retries(
|
|
765
|
+
self,
|
|
766
|
+
request: dict[str, Any],
|
|
767
|
+
base_url: str | None = None,
|
|
768
|
+
timeout_s: float | None = None,
|
|
769
|
+
max_retries: int = 4,
|
|
770
|
+
backoff_factor: float = 2.0,
|
|
771
|
+
extra_headers: dict[str, str] | None = None,
|
|
772
|
+
) -> dict[str, Any]:
|
|
773
|
+
"""
|
|
774
|
+
Generate with exponential backoff retries for transient errors.
|
|
775
|
+
|
|
776
|
+
Args:
|
|
777
|
+
request: OpenAI-compatible chat completion request
|
|
778
|
+
base_url: Override base URL
|
|
779
|
+
timeout_s: Override timeout
|
|
780
|
+
max_retries: Maximum number of retry attempts
|
|
781
|
+
backoff_factor: Exponential backoff multiplier
|
|
782
|
+
extra_headers: Additional headers to include (e.g., X-Policy-Name)
|
|
783
|
+
|
|
784
|
+
Returns:
|
|
785
|
+
OpenAI-compatible chat completion response
|
|
786
|
+
"""
|
|
787
|
+
last_error = None
|
|
788
|
+
processed_request: dict[str, Any] = dict(request or {})
|
|
789
|
+
wait_time = 1.0
|
|
790
|
+
|
|
791
|
+
for attempt in range(max_retries + 1):
|
|
792
|
+
try:
|
|
793
|
+
# Apply parameter fixes to the request
|
|
794
|
+
# CRITICAL: Use proper URL parsing, not string concatenation!
|
|
795
|
+
target_base = base_url or self.base_url
|
|
796
|
+
if target_base:
|
|
797
|
+
parsed_target = urlparse(target_base)
|
|
798
|
+
target_path = parsed_target.path.rstrip("/")
|
|
799
|
+
if not target_path.endswith("/v1/chat/completions"):
|
|
800
|
+
new_target_path = f"{target_path}/v1/chat/completions" if target_path else "/v1/chat/completions"
|
|
801
|
+
parsed_target = parsed_target._replace(path=new_target_path)
|
|
802
|
+
target_url = urlunparse(parsed_target)
|
|
803
|
+
else:
|
|
804
|
+
target_url = target_base
|
|
805
|
+
else:
|
|
806
|
+
target_url = None
|
|
807
|
+
|
|
808
|
+
processed_request = self._fix_model_parameters(
|
|
809
|
+
request,
|
|
810
|
+
target_url=target_url,
|
|
811
|
+
)
|
|
812
|
+
return await self.generate(
|
|
813
|
+
request=processed_request,
|
|
814
|
+
base_url=base_url,
|
|
815
|
+
timeout_s=timeout_s,
|
|
816
|
+
extra_headers=extra_headers,
|
|
817
|
+
)
|
|
818
|
+
except httpx.HTTPStatusError as e:
|
|
819
|
+
# Retry on 400 (overloaded), 429 (rate limit), 500 (internal error), 503 (service unavailable)
|
|
820
|
+
if e.response.status_code not in [400, 429, 500, 503]:
|
|
821
|
+
raise
|
|
822
|
+
last_error = e
|
|
823
|
+
if e.response.status_code == 400:
|
|
824
|
+
# Check if this is an overload error by looking at response content
|
|
825
|
+
try:
|
|
826
|
+
response_data = e.response.json()
|
|
827
|
+
if response_data.get("status") == "overloaded":
|
|
828
|
+
retry_after = response_data.get("retry_after", 1)
|
|
829
|
+
# Use the suggested retry_after time instead of exponential backoff for overload
|
|
830
|
+
wait_time = max(wait_time, float(retry_after))
|
|
831
|
+
logger.warning(
|
|
832
|
+
f"Inference service overloaded (400). {response_data} Retrying after {wait_time}s..."
|
|
833
|
+
)
|
|
834
|
+
else:
|
|
835
|
+
error_block = response_data.get("error")
|
|
836
|
+
error_code = ""
|
|
837
|
+
if isinstance(error_block, dict):
|
|
838
|
+
error_code = str(
|
|
839
|
+
error_block.get("code") or error_block.get("type") or ""
|
|
840
|
+
).lower()
|
|
841
|
+
if error_code in {"tool_use_failed", "tool_call_failed"}:
|
|
842
|
+
logger.error(
|
|
843
|
+
{
|
|
844
|
+
"tool_use_failed": True,
|
|
845
|
+
"target": (base_url or self.base_url),
|
|
846
|
+
"message": error_block.get("message") if isinstance(error_block, dict) else None,
|
|
847
|
+
}
|
|
848
|
+
)
|
|
849
|
+
raise RuntimeError(
|
|
850
|
+
f"Inference 400 response (tool call failed): {error_block.get('message') if isinstance(error_block, dict) else 'Tool call failed'}"
|
|
851
|
+
) from e
|
|
852
|
+
# This is a different type of 400 error, don't retry
|
|
853
|
+
try:
|
|
854
|
+
redacted_headers = {}
|
|
855
|
+
try:
|
|
856
|
+
redacted_headers = dict(self.headers)
|
|
857
|
+
if "Authorization" in redacted_headers:
|
|
858
|
+
redacted_headers["Authorization"] = "***REDACTED***"
|
|
859
|
+
except Exception:
|
|
860
|
+
redacted_headers = {}
|
|
861
|
+
logger.error(
|
|
862
|
+
{
|
|
863
|
+
"non_overload_400": True,
|
|
864
|
+
"target": (base_url or self.base_url),
|
|
865
|
+
"payload": processed_request,
|
|
866
|
+
"headers": redacted_headers,
|
|
867
|
+
"body": e.response.text if e.response is not None else None,
|
|
868
|
+
}
|
|
869
|
+
)
|
|
870
|
+
except Exception:
|
|
871
|
+
pass
|
|
872
|
+
raise RuntimeError(
|
|
873
|
+
f"Inference 400 response: {e.response.text if e.response is not None else 'Bad Request'}"
|
|
874
|
+
) from e
|
|
875
|
+
except Exception:
|
|
876
|
+
# If we can't parse the response, don't retry 400 errors
|
|
877
|
+
with contextlib.suppress(Exception):
|
|
878
|
+
logger.error(
|
|
879
|
+
{
|
|
880
|
+
"non_overload_400_unparsed": True,
|
|
881
|
+
"target": (base_url or self.base_url),
|
|
882
|
+
"payload": processed_request,
|
|
883
|
+
}
|
|
884
|
+
)
|
|
885
|
+
raise RuntimeError(
|
|
886
|
+
f"Inference 400 response (unparsed): {e.response.text if e.response is not None else 'Bad Request'}"
|
|
887
|
+
) from e
|
|
888
|
+
elif e.response.status_code == 503:
|
|
889
|
+
# Avoid referencing undefined response_data
|
|
890
|
+
try:
|
|
891
|
+
preview = (e.response.text or "")[:200]
|
|
892
|
+
except Exception:
|
|
893
|
+
preview = ""
|
|
894
|
+
logger.warning(
|
|
895
|
+
f"Flash returned 503; container may be cold starting. Retrying... body={preview}"
|
|
896
|
+
)
|
|
897
|
+
elif e.response.status_code == 500:
|
|
898
|
+
try:
|
|
899
|
+
preview = (e.response.text or "")[:200]
|
|
900
|
+
except Exception:
|
|
901
|
+
preview = ""
|
|
902
|
+
logger.warning(
|
|
903
|
+
f"Flash returned 500; inference service error. Retrying... body={preview}"
|
|
904
|
+
)
|
|
905
|
+
except httpx.TimeoutException as e:
|
|
906
|
+
last_error = e
|
|
907
|
+
|
|
908
|
+
if attempt < max_retries:
|
|
909
|
+
logger.warning(
|
|
910
|
+
f"Inference request failed (attempt {attempt + 1}/{max_retries + 1}), "
|
|
911
|
+
f"retrying in {wait_time}s..."
|
|
912
|
+
)
|
|
913
|
+
await asyncio.sleep(wait_time)
|
|
914
|
+
wait_time *= backoff_factor
|
|
915
|
+
|
|
916
|
+
if last_error is not None:
|
|
917
|
+
raise last_error
|
|
918
|
+
raise RuntimeError("RL inference retries exhausted with no captured exception")
|
|
919
|
+
|
|
920
|
+
|
|
921
|
+
def create_inference_client(
|
|
922
|
+
task_app: Any,
|
|
923
|
+
api_key: str | None = None,
|
|
924
|
+
) -> OpenAIClient:
|
|
925
|
+
"""
|
|
926
|
+
Create an inference client using TaskApp configuration.
|
|
927
|
+
|
|
928
|
+
Args:
|
|
929
|
+
task_app: TaskApp instance with vllm_base_url
|
|
930
|
+
api_key: Optional API key for authentication
|
|
931
|
+
|
|
932
|
+
Returns:
|
|
933
|
+
Configured OpenAIClient instance
|
|
934
|
+
"""
|
|
935
|
+
# Fallback to environment if caller didn't provide an API key
|
|
936
|
+
if api_key is None:
|
|
937
|
+
try:
|
|
938
|
+
import os as _os # local import to avoid module-level side effects
|
|
939
|
+
|
|
940
|
+
api_key = _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
|
|
941
|
+
except Exception:
|
|
942
|
+
api_key = None
|
|
943
|
+
|
|
944
|
+
import json as _json
|
|
945
|
+
import os as _os
|
|
946
|
+
import time as _time
|
|
947
|
+
|
|
948
|
+
if _os.getenv("SYNTH_FAKE_INFERENCE", "").strip():
|
|
949
|
+
|
|
950
|
+
class _DummyClient:
|
|
951
|
+
async def generate_with_retries(
|
|
952
|
+
self,
|
|
953
|
+
request: dict[str, Any],
|
|
954
|
+
base_url: str | None = None,
|
|
955
|
+
max_retries: int = 0,
|
|
956
|
+
backoff_factor: float = 1.0,
|
|
957
|
+
extra_headers: dict[str, str] | None = None,
|
|
958
|
+
) -> dict[str, Any]:
|
|
959
|
+
tool_call = {
|
|
960
|
+
"id": "call_dummy",
|
|
961
|
+
"type": "function",
|
|
962
|
+
"function": {
|
|
963
|
+
"name": "interact_many",
|
|
964
|
+
"arguments": _json.dumps({"actions": ["move_right"]}),
|
|
965
|
+
},
|
|
966
|
+
}
|
|
967
|
+
return {
|
|
968
|
+
"id": f"cmpl-{int(_time.time())}",
|
|
969
|
+
"object": "chat.completion",
|
|
970
|
+
"created": int(_time.time()),
|
|
971
|
+
"model": request.get("model") or "dummy-model",
|
|
972
|
+
"choices": [
|
|
973
|
+
{
|
|
974
|
+
"index": 0,
|
|
975
|
+
"message": {
|
|
976
|
+
"role": "assistant",
|
|
977
|
+
"content": "",
|
|
978
|
+
"tool_calls": [tool_call],
|
|
979
|
+
},
|
|
980
|
+
"finish_reason": "tool_calls",
|
|
981
|
+
}
|
|
982
|
+
],
|
|
983
|
+
"usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
|
|
984
|
+
}
|
|
985
|
+
|
|
986
|
+
async def check_health(
|
|
987
|
+
self,
|
|
988
|
+
base_url: str | None = None,
|
|
989
|
+
timeout_s: float | None = None,
|
|
990
|
+
) -> dict[str, Any]:
|
|
991
|
+
return {"status": "ok", "dummy": True}
|
|
992
|
+
|
|
993
|
+
import typing as _t
|
|
994
|
+
return _t.cast(OpenAIClient, _DummyClient())
|
|
995
|
+
|
|
996
|
+
return OpenAIClient(
|
|
997
|
+
base_url=task_app.vllm_base_url,
|
|
998
|
+
api_key=api_key,
|
|
999
|
+
)
|