PyPI - synth-ai - Versions diffs - 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl - Mend

synth-ai 0.2.9.dev0py3-none-any.whl → 0.2.23.dev3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (890) hide show

examples/README.md +1 -0
examples/__init__.py +16 -0
examples/analyze_semantic_words.sh +17 -0
examples/baseline/banking77_baseline.py +243 -0
examples/baseline/banking77_pipeline_baseline.py +294 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/mipro/README.md +415 -0
examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/mipro/multi_step.md +79 -0
examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/crafter_debug_render.py +186 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
examples/gepa/banking77_pipeline_gepa.toml +96 -0
examples/gepa/multi_stage_gepa_example.toml +84 -0
examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
examples/multi_step/SFT_README.md +147 -0
examples/multi_step/configs/README_verilog_rl.md +77 -0
examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
examples/multi_step/configs/crafter_synth_backend.md +40 -0
examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
examples/multi_step/configs/verilog_rl_lora.toml +147 -0
examples/multi_step/convert_traces_to_sft.py +84 -0
examples/multi_step/crafter_rl_lora.md +70 -0
examples/multi_step/judges/crafter_backend_judge.py +220 -0
examples/multi_step/judges/verilog_backend_judge.py +234 -0
examples/multi_step/readme.md +48 -0
examples/multi_step/run_sft_qwen30b.sh +45 -0
examples/multi_step/sse_metrics_streaming_notes.md +357 -0
examples/multi_step/task_app_config_notes.md +494 -0
examples/multi_step/verilog_rl_lora.md +218 -0
examples/qwen_coder/README.md +102 -0
examples/qwen_coder/_shared.py +113 -0
examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
examples/qwen_coder/configs/coder_lora_small.toml +57 -0
examples/qwen_coder/generate_dataset.py +98 -0
examples/qwen_coder/infer_ft_smoke.py +65 -0
examples/qwen_coder/infer_prod_proxy.py +73 -0
examples/qwen_coder/infer_via_synth.py +87 -0
examples/qwen_coder/scripts/infer_coder.sh +19 -0
examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
examples/qwen_coder/sft_full_17b.py +103 -0
examples/qwen_coder/sft_lora_30b.py +110 -0
examples/qwen_coder/subset_jsonl.py +39 -0
examples/qwen_coder/todos.md +38 -0
examples/qwen_coder/validate_jsonl.py +60 -0
examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
examples/qwen_vl/QUICKSTART.md +327 -0
examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
examples/qwen_vl/README.md +152 -0
examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
examples/qwen_vl/RL_VISION_TESTING.md +333 -0
examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
examples/qwen_vl/SETUP_COMPLETE.md +274 -0
examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
examples/qwen_vl/__init__.py +2 -0
examples/qwen_vl/collect_data_via_cli.md +415 -0
examples/qwen_vl/collect_vision_traces.py +368 -0
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
examples/qwen_vl/configs/filter_vision_test.toml +8 -0
examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
examples/qwen_vl/run_vision_comparison.sh +61 -0
examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
examples/qwen_vl/test_image_validation.py +201 -0
examples/qwen_vl/test_sft_vision_data.py +110 -0
examples/rl/README.md +169 -0
examples/rl/configs/eval_base_qwen.toml +17 -0
examples/rl/configs/eval_rl_qwen.toml +13 -0
examples/rl/configs/rl_from_base_qwen.toml +62 -0
examples/rl/configs/rl_from_base_qwen17.toml +80 -0
examples/rl/configs/rl_from_ft_qwen.toml +37 -0
examples/rl/download_dataset.py +80 -0
examples/rl/run_eval.py +436 -0
examples/rl/run_rl_and_save.py +111 -0
examples/rl/task_app/README.md +21 -0
{synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
examples/rl/task_app/math_task_app.py +111 -0
examples/run_crafter_demo.sh +10 -0
examples/sdk_prompt_learning_example.py +55 -0
examples/sft/README.md +139 -0
examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
examples/sft/evaluate.py +117 -0
examples/sft/export_dataset.py +120 -0
examples/sft/generate_traces.py +164 -0
examples/swe/__init__.py +12 -0
examples/swe/task_app/README.md +135 -0
examples/swe/task_app/__init__.py +2 -0
examples/swe/task_app/grpo_swe_mini.py +604 -0
examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
examples/swe/task_app/hosted/README.md +173 -0
examples/swe/task_app/hosted/__init__.py +5 -0
examples/swe/task_app/hosted/branching.py +143 -0
examples/swe/task_app/hosted/environment_routes.py +1289 -0
examples/swe/task_app/hosted/envs/__init__.py +1 -0
examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
examples/swe/task_app/hosted/hosted_app.py +204 -0
examples/swe/task_app/hosted/inference/__init__.py +5 -0
examples/swe/task_app/hosted/inference/openai_client.py +584 -0
examples/swe/task_app/hosted/main.py +100 -0
examples/swe/task_app/hosted/policy_routes.py +1094 -0
examples/swe/task_app/hosted/registry.py +195 -0
examples/swe/task_app/hosted/rollout.py +1905 -0
examples/swe/task_app/hosted/storage/__init__.py +5 -0
examples/swe/task_app/hosted/storage/volume.py +211 -0
examples/swe/task_app/hosted/test_agents.py +161 -0
examples/swe/task_app/hosted/test_service.py +136 -0
examples/swe/task_app/hosted/utils.py +62 -0
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
examples/task_apps/TESTING.md +275 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +912 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/banking77_pipeline/__init__.py +6 -0
examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
examples/task_apps/crafter/task_app/README.md +42 -0
examples/task_apps/crafter/task_app/__init__.py +5 -0
examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
examples/task_apps/enron/__init__.py +2 -0
examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
examples/task_apps/enron/filter_sft.toml +5 -0
examples/task_apps/enron/task_app/README.md +14 -0
examples/task_apps/enron/task_app/__init__.py +1 -0
examples/task_apps/enron/task_app/grpo_enron.py +906 -0
examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
examples/task_apps/enron/tests/__init__.py +4 -0
examples/task_apps/enron/tests/conftest.py +115 -0
examples/task_apps/enron/tests/integration/__init__.py +4 -0
examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
examples/task_apps/enron/tests/unit/__init__.py +4 -0
examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +21 -0
examples/task_apps/math/math_single_step.py +1000 -0
examples/task_apps/math/math_task_app.py +115 -0
examples/task_apps/pokemon_battle/__init__.py +2 -0
examples/task_apps/pokemon_battle/modal_app.py +104 -0
examples/task_apps/pokemon_battle/task_app/README.md +68 -0
examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
examples/task_apps/pokemon_red/README.md +356 -0
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
examples/task_apps/pokemon_red/__init__.py +3 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
examples/task_apps/pokemon_red/task_app.py +1048 -0
examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
examples/task_apps/sokoban/README.md +306 -0
examples/task_apps/sokoban/__init__.py +3 -0
examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
examples/task_apps/sokoban/filter_sft.toml +5 -0
examples/task_apps/sokoban/task_app.py +1058 -0
examples/task_apps/sokoban/tests/__init__.py +4 -0
examples/task_apps/sokoban/tests/conftest.py +113 -0
examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
examples/task_apps/verilog/__init__.py +1 -0
examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
examples/task_apps/verilog/filter_sft.toml +5 -0
examples/task_apps/verilog/task_app/README.md +12 -0
examples/task_apps/verilog/task_app/__init__.py +1 -0
examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
examples/task_apps/verilog/tests/__init__.py +4 -0
examples/task_apps/verilog/tests/conftest.py +115 -0
examples/task_apps/verilog/tests/integration/__init__.py +4 -0
examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
examples/task_apps/verilog/tests/unit/__init__.py +4 -0
examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
examples/tunnel_gepa_banking77/README.md +106 -0
examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
examples/vlm/PROPOSAL.md +53 -0
examples/vlm/README.md +68 -0
examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
examples/vlm/crafter_image_only_agent.py +207 -0
examples/vlm/crafter_openai_vlm_agent.py +275 -0
examples/vlm/filter_image_rows.py +63 -0
examples/vlm/run_crafter_vlm_benchmark.py +316 -0
examples/warming_up_to_rl/_utils.py +92 -0
examples/warming_up_to_rl/analyze_trace_db.py +422 -0
examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
examples/warming_up_to_rl/export_trace_sft.py +837 -0
examples/warming_up_to_rl/groq_test.py +97 -0
examples/warming_up_to_rl/manage_secrets.py +131 -0
examples/warming_up_to_rl/old/event_rewards.md +234 -0
examples/warming_up_to_rl/old/notes.md +73 -0
examples/warming_up_to_rl/readme.md +110 -0
examples/warming_up_to_rl/run_eval.py +736 -0
examples/warming_up_to_rl/run_fft_and_save.py +380 -0
examples/warming_up_to_rl/run_local_rollout.py +239 -0
examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
examples/warming_up_to_rl/run_rl_and_save.py +124 -0
examples/warming_up_to_rl/run_rollout_remote.py +156 -0
examples/warming_up_to_rl/task_app/README.md +42 -0
examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
examples/workflows/math_rl/download_dataset.py +80 -0
examples/workflows/math_rl/run_eval.py +436 -0
examples/workflows/math_rl/run_rl_and_save.py +111 -0
synth_ai/__init__.py +47 -23
synth_ai/_utils/__init__.py +47 -0
synth_ai/_utils/base_url.py +10 -0
synth_ai/_utils/http.py +10 -0
synth_ai/_utils/prompts.py +10 -0
synth_ai/_utils/task_app_state.py +12 -0
synth_ai/_utils/user_config.py +10 -0
synth_ai/api/models/supported.py +514 -0
synth_ai/api/train/__init__.py +60 -2
synth_ai/api/train/builders.py +347 -39
synth_ai/api/train/cli.py +895 -160
synth_ai/api/train/config_finder.py +103 -25
synth_ai/api/train/configs/__init__.py +65 -0
synth_ai/api/train/configs/prompt_learning.py +496 -0
synth_ai/api/train/configs/rl.py +188 -0
synth_ai/api/train/configs/sft.py +99 -0
synth_ai/api/train/configs/shared.py +81 -0
synth_ai/api/train/env_resolver.py +70 -20
synth_ai/api/train/pollers.py +29 -4
synth_ai/api/train/prompt_learning.py +425 -0
synth_ai/api/train/sft.py +390 -0
synth_ai/api/train/supported_algos.py +147 -0
synth_ai/api/train/task_app.py +6 -4
synth_ai/api/train/utils.py +64 -52
synth_ai/api/train/validators.py +1117 -0
synth_ai/api/tunnel.py +49 -0
synth_ai/auth/credentials.py +94 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cfgs.py +227 -0
synth_ai/cli/__init__.py +85 -63
synth_ai/cli/_modal_wrapper.py +31 -0
synth_ai/cli/_storage.py +20 -0
synth_ai/cli/_typer_patch.py +47 -0
synth_ai/cli/_validate_task_app.py +29 -0
synth_ai/cli/balance.py +16 -4
synth_ai/cli/calc.py +36 -21
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +267 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +185 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1437 -0
synth_ai/cli/commands/status/__init__.py +66 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/session.py +183 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +32 -140
synth_ai/cli/deploy.py +233 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +28 -22
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/task_app_discovery.py +7 -0
synth_ai/cli/lib/task_app_env.py +518 -0
synth_ai/cli/mcp.py +34 -0
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +256 -0
synth_ai/cli/recent.py +13 -7
synth_ai/cli/rl_demo.py +156 -116
synth_ai/cli/root.py +131 -132
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +49 -0
synth_ai/cli/status.py +7 -125
synth_ai/cli/task_app_deploy.py +7 -0
synth_ai/cli/task_app_list.py +25 -0
synth_ai/cli/task_app_modal_serve.py +11 -0
synth_ai/cli/task_app_serve.py +11 -0
synth_ai/cli/task_apps.py +2284 -257
synth_ai/cli/traces.py +9 -5
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +5 -0
synth_ai/cli/turso.py +73 -0
synth_ai/cli/watch.py +13 -18
synth_ai/demos/__init__.py +10 -0
synth_ai/demos/core/__init__.py +28 -1
synth_ai/demos/core/cli.py +579 -291
synth_ai/demos/crafter/__init__.py +1 -0
synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/demos/demo_registry.py +176 -0
synth_ai/demos/demo_task_apps/__init__.py +3 -3
synth_ai/demos/demo_task_apps/core.py +64 -28
synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
synth_ai/demos/demo_task_apps/math/_common.py +1 -2
synth_ai/demos/demo_task_apps/math/app.py +2 -1
synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
synth_ai/demos/math/__init__.py +1 -0
synth_ai/demos/math/_common.py +16 -0
synth_ai/demos/math/app.py +38 -0
synth_ai/demos/math/config.toml +76 -0
synth_ai/demos/math/deploy_modal.py +54 -0
synth_ai/demos/math/modal_task_app.py +703 -0
synth_ai/demos/math/task_app_entry.py +51 -0
synth_ai/environments/environment/core.py +7 -1
synth_ai/environments/examples/bandit/engine.py +12 -5
synth_ai/environments/examples/bandit/environment.py +0 -1
synth_ai/environments/examples/bandit/taskset.py +4 -4
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/environment.py +93 -2
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/enron/engine.py +7 -2
synth_ai/environments/examples/enron/environment.py +68 -0
synth_ai/environments/examples/red/engine.py +60 -12
synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
synth_ai/environments/examples/red/environment.py +86 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/environments/examples/sokoban/taskset.py +116 -0
synth_ai/environments/examples/verilog/engine.py +104 -12
synth_ai/environments/examples/wordle/environment.py +0 -1
synth_ai/environments/reproducibility/tree.py +5 -6
synth_ai/environments/service/app.py +11 -12
synth_ai/environments/service/core_routes.py +10 -9
synth_ai/environments/stateful/engine.py +1 -1
synth_ai/environments/tasks/core.py +1 -0
synth_ai/environments/tasks/filters.py +5 -6
synth_ai/environments/tasks/utils.py +4 -5
synth_ai/evals/__init__.py +15 -0
synth_ai/evals/base.py +14 -5
synth_ai/evals/client.py +82 -0
synth_ai/evals/types.py +42 -0
synth_ai/http.py +8 -22
synth_ai/http_client.py +45 -12
synth_ai/inference/__init__.py +0 -2
synth_ai/inference/client.py +21 -7
synth_ai/jobs/client.py +129 -80
synth_ai/judge_schemas.py +127 -0
synth_ai/learning/__init__.py +51 -6
synth_ai/learning/algorithms.py +14 -0
synth_ai/learning/client.py +122 -30
synth_ai/learning/config.py +2 -40
synth_ai/learning/constants.py +0 -2
synth_ai/learning/ft_client.py +4 -56
synth_ai/learning/health.py +14 -8
synth_ai/learning/jobs.py +43 -47
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +185 -0
synth_ai/{rl → learning/rl}/__init__.py +14 -5
synth_ai/learning/rl/client.py +269 -0
synth_ai/learning/rl/config.py +31 -0
synth_ai/{rl → learning/rl}/contracts.py +5 -10
synth_ai/{rl → learning/rl}/env_keys.py +45 -16
synth_ai/learning/rl/secrets.py +13 -0
synth_ai/learning/rl_client.py +2 -253
synth_ai/learning/sft/__init__.py +29 -0
synth_ai/learning/sft/client.py +68 -0
synth_ai/learning/sft/config.py +270 -0
synth_ai/learning/sft/data.py +698 -0
synth_ai/learning/sse.py +25 -26
synth_ai/learning/validators.py +29 -25
synth_ai/mcp/__init__.py +5 -0
synth_ai/mcp/__main__.py +8 -0
synth_ai/mcp/main.py +254 -0
synth_ai/mcp/setup.py +100 -0
synth_ai/modal.py +257 -0
synth_ai/pricing/__init__.py +3 -0
synth_ai/pricing/model_pricing.py +64 -0
synth_ai/session/__init__.py +75 -0
synth_ai/session/client.py +383 -0
synth_ai/session/constants.py +63 -0
synth_ai/session/exceptions.py +105 -0
synth_ai/session/manager.py +139 -0
synth_ai/session/models.py +89 -0
synth_ai/session/query.py +110 -0
synth_ai/spec/__init__.py +46 -0
synth_ai/spec/dataclasses.py +149 -0
synth_ai/spec/loader.py +144 -0
synth_ai/spec/serializer.py +199 -0
synth_ai/spec/validation.py +250 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +589 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/__init__.py +50 -30
synth_ai/task/apps/__init__.py +63 -19
synth_ai/task/auth.py +35 -23
synth_ai/task/client.py +15 -13
synth_ai/task/config.py +261 -0
synth_ai/task/contracts.py +165 -64
synth_ai/task/datasets.py +9 -6
synth_ai/task/errors.py +11 -10
synth_ai/task/health.py +17 -11
synth_ai/task/inference_api.py +101 -0
synth_ai/task/json.py +58 -24
synth_ai/task/proxy.py +59 -66
synth_ai/task/rubrics/__init__.py +55 -0
synth_ai/task/rubrics/loaders.py +156 -0
synth_ai/task/rubrics/models.py +57 -0
synth_ai/task/rubrics/scoring.py +116 -0
synth_ai/task/rubrics/strict.py +149 -0
synth_ai/task/rubrics.py +22 -15
synth_ai/task/server.py +65 -31
synth_ai/task/trace_correlation_helpers.py +328 -0
synth_ai/task/tracing_utils.py +44 -28
synth_ai/task/validators.py +449 -6
synth_ai/task/vendors.py +5 -7
synth_ai/tracing_v3/__init__.py +4 -0
synth_ai/tracing_v3/abstractions.py +21 -4
synth_ai/tracing_v3/config.py +167 -22
synth_ai/tracing_v3/constants.py +21 -0
synth_ai/tracing_v3/db_config.py +42 -29
synth_ai/tracing_v3/decorators.py +80 -45
synth_ai/tracing_v3/examples/basic_usage.py +15 -9
synth_ai/tracing_v3/hooks.py +6 -4
synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/replica_sync.py +12 -7
synth_ai/tracing_v3/serialization.py +130 -0
synth_ai/tracing_v3/session_tracer.py +73 -16
synth_ai/tracing_v3/storage/base.py +89 -1
synth_ai/tracing_v3/storage/config.py +63 -16
synth_ai/tracing_v3/storage/factory.py +11 -9
synth_ai/tracing_v3/storage/utils.py +15 -11
synth_ai/tracing_v3/trace_utils.py +317 -0
synth_ai/tracing_v3/turso/__init__.py +8 -21
synth_ai/tracing_v3/turso/daemon.py +123 -15
synth_ai/tracing_v3/turso/models.py +5 -2
synth_ai/tracing_v3/turso/native_manager.py +1293 -0
synth_ai/tracing_v3/utils.py +5 -4
synth_ai/tunnel.py +143 -0
synth_ai/tunnel_deploy.py +278 -0
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +166 -0
synth_ai/utils/agents.py +74 -0
synth_ai/utils/apps.py +152 -0
synth_ai/utils/base_url.py +94 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/claude.py +36 -0
synth_ai/utils/cli.py +284 -0
synth_ai/utils/config.py +81 -0
synth_ai/utils/env.py +346 -0
synth_ai/utils/errors.py +85 -0
synth_ai/utils/http.py +172 -0
synth_ai/utils/json.py +72 -0
synth_ai/utils/log_filter.py +99 -0
synth_ai/utils/logging.py +198 -0
synth_ai/utils/modal.py +299 -0
synth_ai/utils/paths.py +95 -0
synth_ai/utils/process.py +233 -0
synth_ai/utils/prompts.py +39 -0
synth_ai/utils/sqld.py +122 -0
synth_ai/utils/ssl.py +25 -0
synth_ai/utils/task_app_discovery.py +882 -0
synth_ai/utils/task_app_env.py +186 -0
synth_ai/utils/task_app_state.py +318 -0
synth_ai/utils/tunnel/__init__.py +12 -0
synth_ai/utils/tunnel/config.py +55 -0
synth_ai/utils/user_config.py +137 -0
synth_ai/uvicorn.py +77 -0
synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
{synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
{synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
synth_ai/cli/man.py +0 -106
synth_ai/core/experiment.py +0 -15
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -258
synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
synth_ai/experimental/synth_oss.py +0 -446
synth_ai/handshake.py +0 -107
synth_ai/install_sqld.sh +0 -40
synth_ai/learning/offline/dpo.py +0 -0
synth_ai/learning/offline/providers.py +0 -7
synth_ai/learning/offline/sft.py +0 -0
synth_ai/learning/offline/shared.py +0 -0
synth_ai/learning/online/grpo.py +0 -0
synth_ai/learning/online/irft.py +0 -0
synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
synth_ai/learning/prompts/gepa.py +0 -0
synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
synth_ai/learning/prompts/mipro.py +0 -289
synth_ai/learning/prompts/random_search.py +0 -246
synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
synth_ai/lm/__init__.py +0 -51
synth_ai/lm/caching/constants.py +0 -6
synth_ai/lm/caching/dbs.py +0 -0
synth_ai/lm/caching/ephemeral.py +0 -102
synth_ai/lm/caching/handler.py +0 -137
synth_ai/lm/caching/initialize.py +0 -11
synth_ai/lm/caching/persistent.py +0 -114
synth_ai/lm/config.py +0 -110
synth_ai/lm/constants.py +0 -32
synth_ai/lm/core/__init__.py +0 -8
synth_ai/lm/core/all.py +0 -73
synth_ai/lm/core/exceptions.py +0 -7
synth_ai/lm/core/main.py +0 -319
synth_ai/lm/core/main_v3.py +0 -594
synth_ai/lm/core/synth_models.py +0 -48
synth_ai/lm/core/vendor_clients.py +0 -188
synth_ai/lm/cost/monitor.py +0 -1
synth_ai/lm/cost/statefulness.py +0 -1
synth_ai/lm/injection.py +0 -80
synth_ai/lm/overrides.py +0 -206
synth_ai/lm/provider_support/__init__.py +0 -8
synth_ai/lm/provider_support/anthropic.py +0 -972
synth_ai/lm/provider_support/openai.py +0 -1139
synth_ai/lm/provider_support/suppress_logging.py +0 -31
synth_ai/lm/structured_outputs/handler.py +0 -440
synth_ai/lm/structured_outputs/inject.py +0 -297
synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
synth_ai/lm/tools/__init__.py +0 -3
synth_ai/lm/tools/base.py +0 -172
synth_ai/lm/unified_interface.py +0 -202
synth_ai/lm/vendors/base.py +0 -81
synth_ai/lm/vendors/core/anthropic_api.py +0 -387
synth_ai/lm/vendors/core/gemini_api.py +0 -292
synth_ai/lm/vendors/core/mistral_api.py +0 -322
synth_ai/lm/vendors/core/openai_api.py +0 -225
synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
synth_ai/lm/vendors/local/ollama.py +0 -0
synth_ai/lm/vendors/openai_standard.py +0 -780
synth_ai/lm/vendors/openai_standard_responses.py +0 -256
synth_ai/lm/vendors/retries.py +0 -22
synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
synth_ai/lm/vendors/supported/deepseek.py +0 -69
synth_ai/lm/vendors/supported/grok.py +0 -75
synth_ai/lm/vendors/supported/groq.py +0 -16
synth_ai/lm/vendors/supported/ollama.py +0 -15
synth_ai/lm/vendors/supported/openrouter.py +0 -74
synth_ai/lm/vendors/supported/together.py +0 -11
synth_ai/lm/vendors/synth_client.py +0 -808
synth_ai/lm/warmup.py +0 -186
synth_ai/rl/secrets.py +0 -19
synth_ai/scripts/verify_rewards.py +0 -100
synth_ai/task/apps/grpo_crafter.py +0 -438
synth_ai/tracing/__init__.py +0 -30
synth_ai/tracing_v1/__init__.py +0 -33
synth_ai/tracing_v3/turso/manager.py +0 -774
synth_ai/v0/tracing/abstractions.py +0 -224
synth_ai/v0/tracing/base_client.py +0 -91
synth_ai/v0/tracing/client_manager.py +0 -131
synth_ai/v0/tracing/config.py +0 -142
synth_ai/v0/tracing/context.py +0 -146
synth_ai/v0/tracing/decorators.py +0 -682
synth_ai/v0/tracing/events/__init__.py +0 -0
synth_ai/v0/tracing/events/manage.py +0 -147
synth_ai/v0/tracing/events/scope.py +0 -86
synth_ai/v0/tracing/events/store.py +0 -228
synth_ai/v0/tracing/immediate_client.py +0 -151
synth_ai/v0/tracing/local.py +0 -18
synth_ai/v0/tracing/log_client_base.py +0 -73
synth_ai/v0/tracing/retry_queue.py +0 -186
synth_ai/v0/tracing/trackers.py +0 -515
synth_ai/v0/tracing/upload.py +0 -512
synth_ai/v0/tracing/utils.py +0 -9
synth_ai/v0/tracing_v1/__init__.py +0 -16
synth_ai/v0/tracing_v1/abstractions.py +0 -224
synth_ai/v0/tracing_v1/base_client.py +0 -91
synth_ai/v0/tracing_v1/client_manager.py +0 -131
synth_ai/v0/tracing_v1/config.py +0 -142
synth_ai/v0/tracing_v1/context.py +0 -146
synth_ai/v0/tracing_v1/decorators.py +0 -703
synth_ai/v0/tracing_v1/events/__init__.py +0 -0
synth_ai/v0/tracing_v1/events/manage.py +0 -147
synth_ai/v0/tracing_v1/events/scope.py +0 -86
synth_ai/v0/tracing_v1/events/store.py +0 -228
synth_ai/v0/tracing_v1/immediate_client.py +0 -151
synth_ai/v0/tracing_v1/local.py +0 -18
synth_ai/v0/tracing_v1/log_client_base.py +0 -73
synth_ai/v0/tracing_v1/retry_queue.py +0 -186
synth_ai/v0/tracing_v1/trackers.py +0 -515
synth_ai/v0/tracing_v1/upload.py +0 -527
synth_ai/v0/tracing_v1/utils.py +0 -9
synth_ai/zyk/__init__.py +0 -30
synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
{synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
{synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
{synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
{synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
{synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
{synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
{synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
{synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
/synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
/synth_ai/{learning/filtering.py → py.typed} +0 -0
{synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0

synth_ai/lm/vendors/openai_standard.py DELETED Viewed

@@ -1,780 +0,0 @@
-import asyncio
-import os
-import time
-from typing import Any
-import backoff
-import groq
-import openai
-import pydantic_core
-from pydantic import BaseModel
-from synth_ai.lm.caching.initialize import (
-    get_cache_handler,
-)
-from synth_ai.lm.constants import SPECIAL_BASE_TEMPS
-from synth_ai.lm.injection import apply_injection
-from synth_ai.lm.overrides import (
-    apply_param_overrides,
-    apply_tool_overrides,
-    use_overrides_for_messages,
-)
-from synth_ai.lm.tools.base import BaseTool
-from synth_ai.lm.vendors.base import BaseLMResponse, VendorBase
-from synth_ai.lm.vendors.openai_standard_responses import OpenAIResponsesAPIMixin
-from synth_ai.lm.vendors.retries import MAX_BACKOFF
-DEFAULT_EXCEPTIONS_TO_RETRY = (
-    pydantic_core._pydantic_core.ValidationError,
-    openai.APIConnectionError,
-    openai.APITimeoutError,
-    groq.InternalServerError,
-    groq.APITimeoutError,
-    groq.APIConnectionError,
-)
-def special_orion_transform(model: str, messages: list[dict[str, Any]]) -> list[dict[str, Any]]:
-    """
-    Transform messages for O1 series models which don't support system messages.
-    Args:
-        model: Model name to check
-        messages: Original messages list
-    Returns:
-        Transformed messages list with system content merged into user message
-    """
-    if "o1-" in model:
-        messages = [
-            {
-                "role": "user",
-                "content": f"<instructions>{messages[0]['content']}</instructions><information>{messages[1]}</information>",
-            }
-        ]
-    return messages
-def _silent_backoff_handler(_details):
-    """No-op handler to keep stdout clean while still allowing visibility via logging if desired."""
-    pass
-class OpenAIStandard(VendorBase, OpenAIResponsesAPIMixin):
-    """
-    Standard OpenAI-compatible vendor implementation.
-    This class provides a standard implementation for OpenAI-compatible APIs,
-    including proper retry logic, caching, and support for various model features.
-    Attributes:
-        used_for_structured_outputs: Whether this client supports structured outputs
-        exceptions_to_retry: List of exceptions that trigger automatic retries
-        sync_client: Synchronous API client
-        async_client: Asynchronous API client
-    """
-    used_for_structured_outputs: bool = True
-    exceptions_to_retry: list = DEFAULT_EXCEPTIONS_TO_RETRY
-    sync_client: Any
-    async_client: Any
-    def __init__(
-        self,
-        sync_client: Any,
-        async_client: Any,
-        exceptions_to_retry: list[Exception] = DEFAULT_EXCEPTIONS_TO_RETRY,
-        used_for_structured_outputs: bool = False,
-    ):
-        self.sync_client = sync_client
-        self.async_client = async_client
-        self.used_for_structured_outputs = used_for_structured_outputs
-        self.exceptions_to_retry = exceptions_to_retry
-        # Initialize Harmony support for OSS models
-        self.harmony_available = False
-        self.harmony_enc = None
-        try:
-            from openai_harmony import HarmonyEncodingName, load_harmony_encoding
-            self.harmony_available = True
-            self.harmony_enc = load_harmony_encoding(HarmonyEncodingName.HARMONY_GPT_OSS)
-        except ImportError:
-            pass
-    @backoff.on_exception(
-        backoff.expo,
-        DEFAULT_EXCEPTIONS_TO_RETRY,
-        max_time=MAX_BACKOFF,
-        jitter=backoff.full_jitter,
-        on_backoff=_silent_backoff_handler,
-    )
-    async def _hit_api_async(
-        self,
-        model: str,
-        messages: list[dict[str, Any]],
-        lm_config: dict[str, Any],
-        use_ephemeral_cache_only: bool = False,
-        reasoning_effort: str = "high",
-        tools: list[BaseTool] | None = None,
-    ) -> BaseLMResponse:
-        assert lm_config.get("response_model") is None, (
-            "response_model is not supported for standard calls"
-        )
-        debug = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
-        if debug:
-            print("🔍 OPENAI DEBUG: _hit_api_async called with:")
-            print(f"   Model: {model}")
-            print(f"   Messages: {len(messages)} messages")
-            print(f"   Tools: {len(tools) if tools else 0} tools")
-            print(f"   LM config: {lm_config}")
-        messages = special_orion_transform(model, messages)
-        # Apply context-scoped overrides and prompt injection just before building API params
-        with use_overrides_for_messages(messages):
-            messages = apply_injection(messages)
-        used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
-        lm_config["reasoning_effort"] = reasoning_effort
-        cache_result = used_cache_handler.hit_managed_cache(
-            model, messages, lm_config=lm_config, tools=tools
-        )
-        if cache_result and debug:
-            print("🔍 OPENAI DEBUG: Cache hit! Returning cached result")
-            print(f"   Cache result type: {type(cache_result)}")
-            print("🔍 OPENAI DEBUG: DISABLING CACHE FOR DEBUGGING - forcing API call")
-            # return cache_result  # Commented out intentionally when debug is on
-        if debug:
-            print("🔍 OPENAI DEBUG: Cache miss, making actual API call")
-        # Common API call params
-        api_params = {
-            "model": model,
-            "messages": messages,
-        }
-        with use_overrides_for_messages(messages):
-            api_params = apply_param_overrides(api_params)
-        # Add tools if provided
-        if tools and all(isinstance(tool, BaseTool) for tool in tools):
-            api_params["tools"] = [tool.to_openai_tool() for tool in tools]
-        elif tools:
-            api_params["tools"] = tools
-        # Only add temperature for non o1/o3 models, and do not override if already set via overrides
-        if (
-            not any(prefix in model for prefix in ["o1-", "o3-"])
-            and "temperature" not in api_params
-        ):
-            api_params["temperature"] = lm_config.get(
-                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
-            )
-        # Forward additional sampling / control params if provided
-        if lm_config.get("max_tokens") is not None:
-            api_params["max_tokens"] = lm_config["max_tokens"]
-        if lm_config.get("top_p") is not None:
-            api_params["top_p"] = lm_config["top_p"]
-        if lm_config.get("frequency_penalty") is not None:
-            api_params["frequency_penalty"] = lm_config["frequency_penalty"]
-        if lm_config.get("presence_penalty") is not None:
-            api_params["presence_penalty"] = lm_config["presence_penalty"]
-        if lm_config.get("stop") is not None:
-            api_params["stop"] = lm_config["stop"]
-        if lm_config.get("tool_choice") is not None:
-            api_params["tool_choice"] = lm_config["tool_choice"]
-        # Forward GPU preference to backend (body + header)
-        if lm_config.get("gpu_preference") is not None:
-            api_params["gpu_preference"] = lm_config["gpu_preference"]
-            # Also set header so proxies that read headers can honor it
-            hdrs = api_params.get("extra_headers", {})
-            hdrs["X-GPU-Preference"] = lm_config["gpu_preference"]
-            api_params["extra_headers"] = hdrs
-        # Also mirror stop_after_tool_calls into a header for robustness
-        try:
-            satc_val = None
-            if isinstance(lm_config.get("extra_body"), dict):
-                satc_val = lm_config["extra_body"].get("stop_after_tool_calls")
-            if satc_val is not None:
-                hdrs = api_params.get("extra_headers", {})
-                hdrs["X-Stop-After-Tool-Calls"] = str(satc_val)
-                api_params["extra_headers"] = hdrs
-        except Exception:
-            pass
-        # Apply overrides (tools and params) from context after building baseline params
-        with use_overrides_for_messages(messages):
-            api_params = apply_tool_overrides(api_params)
-            api_params = apply_param_overrides(api_params)
-        # Thinking controls: route via extra_body.chat_template_kwargs for compatibility
-        thinking_mode_val = lm_config.get("thinking_mode")
-        thinking_budget_val = lm_config.get("thinking_budget")
-        if thinking_mode_val is not None or thinking_budget_val is not None:
-            api_params["extra_body"] = api_params.get("extra_body", {})
-            ctk = api_params["extra_body"].get("chat_template_kwargs", {})
-            if thinking_mode_val is not None:
-                ctk["thinking_mode"] = thinking_mode_val
-            if thinking_budget_val is not None:
-                try:
-                    ctk["thinking_budget"] = int(thinking_budget_val)
-                except Exception:
-                    ctk["thinking_budget"] = thinking_budget_val
-            api_params["extra_body"]["chat_template_kwargs"] = ctk
-        # Backward-compatible: forward legacy enable_thinking only via extra_body for callers still using it
-        if lm_config.get("enable_thinking") is not None:
-            api_params["extra_body"] = api_params.get("extra_body", {})
-            ctk = api_params["extra_body"].get("chat_template_kwargs", {})
-            ctk["enable_thinking"] = lm_config["enable_thinking"]
-            api_params["extra_body"]["chat_template_kwargs"] = ctk
-        # Forward arbitrary extra_body from lm_config if provided (merge)
-        if lm_config.get("extra_body") is not None:
-            # Shallow-merge top-level keys; nested keys (like chat_template_kwargs) should be provided whole
-            api_params["extra_body"] = {
-                **api_params.get("extra_body", {}),
-                **(lm_config.get("extra_body") or {}),
-            }
-        # Ensure legacy extra_body flag remains merged (do not override top-level fields)
-        if lm_config.get("enable_thinking") is not None:
-            api_params["extra_body"] = api_params.get("extra_body", {})
-            ctk = api_params["extra_body"].get("chat_template_kwargs", {})
-            ctk["enable_thinking"] = lm_config["enable_thinking"]
-            api_params["extra_body"]["chat_template_kwargs"] = ctk
-        # Add reasoning_effort only for o3-mini
-        if model in ["o3-mini"]:
-            print("Reasoning effort:", reasoning_effort)
-            api_params["reasoning_effort"] = reasoning_effort
-        # Filter Synth-only params when calling external OpenAI-compatible providers
-        # External providers (e.g., OpenAI, Groq) reject unknown fields like
-        # extra_body.chat_template_kwargs or stop_after_tool_calls.
-        try:
-            base_url_obj = getattr(self.async_client, "base_url", None)
-            base_url_str = str(base_url_obj) if base_url_obj is not None else ""
-        except Exception:
-            base_url_str = ""
-        is_external_provider = "openai.com" in base_url_str or "api.groq.com" in base_url_str
-        if is_external_provider:
-            # Remove extra_body entirely; this is Synth-specific plumbing
-            if "extra_body" in api_params:
-                api_params.pop("extra_body", None)
-            # Also ensure we don't pass stray vendor-specific fields if present
-            # (defensive in case upstream added them at top-level later)
-            for k in ["chat_template_kwargs", "stop_after_tool_calls"]:
-                api_params.pop(k, None)
-            # GPT-5 models: parameter normalization
-            if model.startswith("gpt-5"):
-                # Require max_completion_tokens instead of max_tokens
-                if "max_tokens" in api_params:
-                    api_params["max_completion_tokens"] = api_params.pop("max_tokens")
-                # Only default temperature=1 supported; omit custom temperature
-                if "temperature" in api_params:
-                    api_params.pop("temperature", None)
-        # Call API with better auth error reporting
-        # try:
-        if debug:
-            print("🔍 OPENAI DEBUG: Making request with params:")
-            print(f"   Model: {api_params.get('model')}")
-            print(f"   Messages: {len(api_params.get('messages', []))} messages")
-            print(f"   Tools: {len(api_params.get('tools', []))} tools")
-            print(f"   Max tokens: {api_params.get('max_tokens', 'NOT SET')}")
-            print(f"   Temperature: {api_params.get('temperature', 'NOT SET')}")
-            if "tools" in api_params:
-                print(f"   First tool: {api_params['tools'][0]}")
-            print(f"   FULL API PARAMS: {api_params}")
-        # Quiet targeted retry for OpenAI 400 tool_use_failed during tool-calling
-        try:
-            max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
-        except Exception:
-            max_attempts_for_tool_use = 5
-        try:
-            backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
-        except Exception:
-            backoff_seconds = 0.5
-        attempt_index = 0
-        while True:
-            try:
-                output = await self.async_client.chat.completions.create(**api_params)
-                break
-            except openai.BadRequestError as err:
-                # Detect tool-use failure from various SDK surfaces
-                should_retry = False
-                # 1) Body dict
-                body = getattr(err, "body", None)
-                if isinstance(body, dict):
-                    try:
-                        err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
-                        code_val = err_obj.get("code")
-                        msg_val = err_obj.get("message")
-                        if code_val == "tool_use_failed" or (
-                            isinstance(msg_val, str) and "Failed to call a function" in msg_val
-                        ):
-                            should_retry = True
-                    except Exception:
-                        pass
-                # 2) Response JSON
-                if not should_retry:
-                    try:
-                        resp = getattr(err, "response", None)
-                        if resp is not None:
-                            j = resp.json()
-                            if isinstance(j, dict):
-                                err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
-                                code_val = err_obj.get("code")
-                                msg_val = err_obj.get("message")
-                                if code_val == "tool_use_failed" or (
-                                    isinstance(msg_val, str)
-                                    and "Failed to call a function" in msg_val
-                                ):
-                                    should_retry = True
-                    except Exception:
-                        pass
-                # 3) Fallback to string match
-                if not should_retry:
-                    err_text = str(err)
-                    if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
-                        should_retry = True
-                if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
-                    await asyncio.sleep(backoff_seconds)
-                    backoff_seconds = min(backoff_seconds * 2.0, 2.0)
-                    attempt_index += 1
-                    continue
-                raise
-        if debug:
-            print("🔍 OPENAI DEBUG: Response received:")
-            print(f"   Type: {type(output)}")
-            print(f"   Choices: {len(output.choices) if hasattr(output, 'choices') else 'N/A'}")
-            if hasattr(output, "choices") and output.choices:
-                choice = output.choices[0]
-                print(f"   Choice type: {type(choice)}")
-                if hasattr(choice, "message"):
-                    message = choice.message
-                    print(f"   Message type: {type(message)}")
-                    print(f"   Has tool_calls: {hasattr(message, 'tool_calls')}")
-                    if hasattr(message, "tool_calls"):
-                        print(f"   Tool calls: {message.tool_calls}")
-                    print(
-                        f"   Content: {message.content[:200] if hasattr(message, 'content') and message.content else 'None'}..."
-                    )
-                # Show finish_reason and usage if available
-                try:
-                    print(f"   finish_reason: {getattr(choice, 'finish_reason', None)}")
-                    usage = getattr(output, "usage", None)
-                    if usage:
-                        print(
-                            f"   usage: prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}"
-                        )
-                except Exception:
-                    pass
-        if debug:
-            print("🔍 OPENAI DEBUG: FULL RAW RESPONSE:")
-            if hasattr(output.choices[0].message, "content") and output.choices[0].message.content:
-                print(f"   FULL CONTENT:\n{output.choices[0].message.content}")
-            print(f"   Raw choice: {choice}")
-            print(f"   Raw message: {message}")
-        # except Exception as e:
-        #     try:
-        #         from openai import AuthenticationError as _OpenAIAuthErr  # type: ignore
-        #     except ModuleNotFoundError:
-        #         _OpenAIAuthErr = type(e)
-        #     if isinstance(e, _OpenAIAuthErr):
-        #         key_preview = (os.getenv("OPENAI_API_KEY") or "")[:8]
-        #         # Create a more informative error message but preserve the original exception
-        #         enhanced_msg = f"Invalid API key format. Expected prefix 'sk-' or 'sk_live_'. Provided key begins with '{key_preview}'. Original error: {str(e)}"
-        #         # Re-raise the original exception with enhanced message if possible
-        #         if hasattr(e, 'response') and hasattr(e, 'body'):
-        #             raise _OpenAIAuthErr(enhanced_msg, response=e.response, body=e.body) from None
-        #         else:
-        #             # Fallback: just re-raise the original with a print for debugging
-        #             print(f"🔑 API Key Debug: {enhanced_msg}")
-        #             raise e from None
-        #     raise
-        message = output.choices[0].message
-        # Convert tool calls to dict format, preferring dict-shaped entries first
-        tool_calls = None
-        if message.tool_calls:
-            converted: list[dict] = []
-            for tc in message.tool_calls:
-                if isinstance(tc, dict):
-                    fn = tc.get("function") or {}
-                    converted.append(
-                        {
-                            "id": tc.get("id"),
-                            "type": tc.get("type", "function"),
-                            "function": {
-                                "name": fn.get("name") or tc.get("name"),
-                                "arguments": fn.get("arguments") or tc.get("arguments"),
-                            },
-                        }
-                    )
-                else:
-                    # SDK object path
-                    converted.append(
-                        {
-                            "id": getattr(tc, "id", None),
-                            "type": getattr(tc, "type", "function"),
-                            "function": {
-                                "name": getattr(getattr(tc, "function", None), "name", None),
-                                "arguments": getattr(getattr(tc, "function", None), "arguments", None),
-                            },
-                        }
-                    )
-            tool_calls = converted or None
-        # Attach basic usage if available
-        usage_dict = None
-        try:
-            usage_obj = getattr(output, "usage", None)
-            if usage_obj is not None:
-                usage_dict = {
-                    "prompt_tokens": getattr(usage_obj, "prompt_tokens", None),
-                    "completion_tokens": getattr(usage_obj, "completion_tokens", None),
-                    "total_tokens": getattr(usage_obj, "total_tokens", None),
-                }
-        except Exception:
-            usage_dict = None
-        lm_response = BaseLMResponse(
-            raw_response=message.content or "",  # Use empty string if no content
-            structured_output=None,
-            tool_calls=tool_calls,
-            usage=usage_dict,
-        )
-        lm_config["reasoning_effort"] = reasoning_effort
-        used_cache_handler.add_to_managed_cache(
-            model, messages, lm_config=lm_config, output=lm_response, tools=tools
-        )
-        return lm_response
-    @backoff.on_exception(
-        backoff.expo,
-        DEFAULT_EXCEPTIONS_TO_RETRY,
-        max_time=MAX_BACKOFF,
-        jitter=backoff.full_jitter,
-        on_backoff=_silent_backoff_handler,
-    )
-    def _hit_api_sync(
-        self,
-        model: str,
-        messages: list[dict[str, Any]],
-        lm_config: dict[str, Any],
-        use_ephemeral_cache_only: bool = False,
-        reasoning_effort: str = "high",
-        tools: list[BaseTool] | None = None,
-    ) -> BaseLMResponse:
-        assert lm_config.get("response_model") is None, (
-            "response_model is not supported for standard calls"
-        )
-        messages = special_orion_transform(model, messages)
-        with use_overrides_for_messages(messages):
-            # Apply context-scoped prompt injection just before building API params
-            messages = apply_injection(messages)
-        used_cache_handler = get_cache_handler(use_ephemeral_cache_only=use_ephemeral_cache_only)
-        lm_config["reasoning_effort"] = reasoning_effort
-        cache_result = used_cache_handler.hit_managed_cache(
-            model, messages, lm_config=lm_config, tools=tools
-        )
-        # During pytest runs, bypass returning cache to allow tests to inspect outgoing params
-        in_pytest = os.getenv("PYTEST_CURRENT_TEST") is not None
-        if cache_result and not in_pytest:
-            return cache_result
-        # Common API call params
-        api_params = {
-            "model": model,
-            "messages": messages,
-        }
-        with use_overrides_for_messages(messages):
-            api_params = apply_param_overrides(api_params)
-        # Add tools if provided
-        if tools and all(isinstance(tool, BaseTool) for tool in tools):
-            api_params["tools"] = [tool.to_openai_tool() for tool in tools]
-        elif tools:
-            api_params["tools"] = tools
-        # Apply overrides (tools and params) using module-level imports
-        with use_overrides_for_messages(messages):
-            api_params = apply_tool_overrides(api_params)
-            api_params = apply_param_overrides(api_params)
-        # Only add temperature for non o1/o3 models, and do not override if already set via overrides
-        if (
-            not any(prefix in model for prefix in ["o1-", "o3-"])
-            and "temperature" not in api_params
-        ):
-            api_params["temperature"] = lm_config.get(
-                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
-            )
-        # Forward additional sampling / control params if provided
-        if lm_config.get("max_tokens") is not None:
-            api_params["max_tokens"] = lm_config["max_tokens"]
-        if lm_config.get("top_p") is not None:
-            api_params["top_p"] = lm_config["top_p"]
-        if lm_config.get("frequency_penalty") is not None:
-            api_params["frequency_penalty"] = lm_config["frequency_penalty"]
-        if lm_config.get("presence_penalty") is not None:
-            api_params["presence_penalty"] = lm_config["presence_penalty"]
-        if lm_config.get("stop") is not None:
-            api_params["stop"] = lm_config["stop"]
-        if lm_config.get("tool_choice") is not None:
-            api_params["tool_choice"] = lm_config["tool_choice"]
-        # Add reasoning_effort only for o3-mini
-        if model in ["o3-mini"]:
-            api_params["reasoning_effort"] = reasoning_effort
-        # Sync path: apply the same targeted retry
-        try:
-            max_attempts_for_tool_use = int(os.getenv("SYNTH_TOOL_USE_RETRIES", "5"))
-        except Exception:
-            max_attempts_for_tool_use = 5
-        try:
-            backoff_seconds = float(os.getenv("SYNTH_TOOL_USE_BACKOFF_INITIAL", "0.5"))
-        except Exception:
-            backoff_seconds = 0.5
-        attempt_index = 0
-        while True:
-            try:
-                output = self.sync_client.chat.completions.create(**api_params)
-                break
-            except openai.BadRequestError as err:
-                should_retry = False
-                body = getattr(err, "body", None)
-                if isinstance(body, dict):
-                    try:
-                        err_obj = body.get("error") if isinstance(body.get("error"), dict) else {}
-                        code_val = err_obj.get("code")
-                        msg_val = err_obj.get("message")
-                        if code_val == "tool_use_failed" or (
-                            isinstance(msg_val, str) and "Failed to call a function" in msg_val
-                        ):
-                            should_retry = True
-                    except Exception:
-                        pass
-                if not should_retry:
-                    try:
-                        resp = getattr(err, "response", None)
-                        if resp is not None:
-                            j = resp.json()
-                            if isinstance(j, dict):
-                                err_obj = j.get("error") if isinstance(j.get("error"), dict) else {}
-                                code_val = err_obj.get("code")
-                                msg_val = err_obj.get("message")
-                                if code_val == "tool_use_failed" or (
-                                    isinstance(msg_val, str)
-                                    and "Failed to call a function" in msg_val
-                                ):
-                                    should_retry = True
-                    except Exception:
-                        pass
-                if not should_retry:
-                    err_text = str(err)
-                    if "tool_use_failed" in err_text or "Failed to call a function" in err_text:
-                        should_retry = True
-                if should_retry and attempt_index + 1 < max_attempts_for_tool_use:
-                    time.sleep(backoff_seconds)
-                    backoff_seconds = min(backoff_seconds * 2.0, 2.0)
-                    attempt_index += 1
-                    continue
-                raise
-        message = output.choices[0].message
-        debug_sync = os.getenv("SYNTH_OPENAI_DEBUG") == "1"
-        if debug_sync:
-            try:
-                print(
-                    f"🔍 OPENAI DEBUG (sync): finish_reason={getattr(output.choices[0], 'finish_reason', None)}"
-                )
-                usage = getattr(output, "usage", None)
-                if usage:
-                    print(
-                        f"🔍 OPENAI DEBUG (sync): usage prompt_tokens={getattr(usage, 'prompt_tokens', None)}, completion_tokens={getattr(usage, 'completion_tokens', None)}, total_tokens={getattr(usage, 'total_tokens', None)}"
-                    )
-            except Exception:
-                pass
-        # Convert tool calls to dict format
-        tool_calls = None
-        if message.tool_calls:
-            tool_calls = [
-                {
-                    "id": tc.id,
-                    "type": tc.type,
-                    "function": {
-                        "name": tc.function.name,
-                        "arguments": tc.function.arguments,
-                    },
-                }
-                for tc in message.tool_calls
-            ]
-        # Attach basic usage if available
-        usage_dict = None
-        try:
-            usage_obj = getattr(output, "usage", None)
-            if usage_obj is not None:
-                usage_dict = {
-                    "prompt_tokens": getattr(usage_obj, "prompt_tokens", None),
-                    "completion_tokens": getattr(usage_obj, "completion_tokens", None),
-                    "total_tokens": getattr(usage_obj, "total_tokens", None),
-                }
-        except Exception:
-            usage_dict = None
-        lm_response = BaseLMResponse(
-            raw_response=message.content or "",  # Use empty string if no content
-            structured_output=None,
-            tool_calls=tool_calls,
-            usage=usage_dict,
-        )
-        lm_config["reasoning_effort"] = reasoning_effort
-        used_cache_handler.add_to_managed_cache(
-            model, messages, lm_config=lm_config, output=lm_response, tools=tools
-        )
-        return lm_response
-    async def _hit_api_async_structured_output(
-        self,
-        model: str,
-        messages: list[dict[str, Any]],
-        response_model: BaseModel,
-        temperature: float,
-        use_ephemeral_cache_only: bool = False,
-        reasoning_effort: str = "high",
-        tools: list[BaseTool] | None = None,
-    ) -> BaseLMResponse:
-        lm_config = {
-            "temperature": temperature,
-            "response_model": response_model,
-            "reasoning_effort": reasoning_effort,
-        }
-        used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
-        cache_result: BaseLMResponse | None = used_cache_handler.hit_managed_cache(
-            model, messages, lm_config=lm_config, tools=tools
-        )
-        if cache_result is not None:
-            return cache_result
-        # Common API call params
-        api_params = {
-            "model": model,
-            "messages": messages,
-        }
-        # Add tools if provided
-        if tools and all(isinstance(tool, BaseTool) for tool in tools):
-            api_params["tools"] = [tool.to_openai_tool() for tool in tools]
-        elif tools:
-            api_params["tools"] = tools
-        # Only add temperature for non o1/o3 models
-        if not any(prefix in model for prefix in ["o1-", "o3-"]):
-            api_params["temperature"] = lm_config.get(
-                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
-            )
-        # Add reasoning_effort only for o3-mini
-        if model in ["o3-mini"]:
-            api_params["reasoning_effort"] = reasoning_effort
-        output = await self.async_client.chat.completions.create(**api_params)
-        structured_output_api_result = response_model(**output.choices[0].message.content)
-        tool_calls = output.choices[0].message.tool_calls
-        lm_response = BaseLMResponse(
-            raw_response=output.choices[0].message.content,
-            structured_output=structured_output_api_result,
-            tool_calls=tool_calls,
-        )
-        lm_config["reasoning_effort"] = reasoning_effort
-        used_cache_handler.add_to_managed_cache(
-            model, messages, lm_config=lm_config, output=lm_response, tools=tools
-        )
-        return lm_response
-    def _hit_api_sync_structured_output(
-        self,
-        model: str,
-        messages: list[dict[str, Any]],
-        response_model: BaseModel,
-        temperature: float,
-        use_ephemeral_cache_only: bool = False,
-        reasoning_effort: str = "high",
-        tools: list[BaseTool] | None = None,
-    ) -> BaseLMResponse:
-        lm_config = {
-            "temperature": temperature,
-            "response_model": response_model,
-            "reasoning_effort": reasoning_effort,
-        }
-        used_cache_handler = get_cache_handler(use_ephemeral_cache_only)
-        cache_result: BaseLMResponse | None = used_cache_handler.hit_managed_cache(
-            model, messages, lm_config=lm_config, tools=tools
-        )
-        if cache_result is not None:
-            return cache_result
-        # Common API call params
-        api_params = {
-            "model": model,
-            "messages": messages,
-        }
-        # Add tools if provided
-        if tools and all(isinstance(tool, BaseTool) for tool in tools):
-            api_params["tools"] = [tool.to_openai_tool() for tool in tools]
-        elif tools:
-            api_params["tools"] = tools
-        # Only add temperature for non o1/o3 models
-        if not any(prefix in model for prefix in ["o1-", "o3-"]):
-            api_params["temperature"] = lm_config.get(
-                "temperature", SPECIAL_BASE_TEMPS.get(model, 0)
-            )
-        # Add reasoning_effort only for o3-mini
-        if model in ["o3-mini"]:
-            api_params["reasoning_effort"] = reasoning_effort
-        # Normalize for external OpenAI as well in sync path
-        try:
-            base_url_obj = getattr(self.sync_client, "base_url", None)
-            base_url_str_sync = str(base_url_obj) if base_url_obj is not None else ""
-        except Exception:
-            base_url_str_sync = ""
-        if (
-            "openai.com" in base_url_str_sync or "api.groq.com" in base_url_str_sync
-        ) and model.startswith("gpt-5"):
-            if "max_tokens" in api_params:
-                api_params["max_completion_tokens"] = api_params.pop("max_tokens")
-            if "temperature" in api_params:
-                api_params.pop("temperature", None)
-        output = self.sync_client.chat.completions.create(**api_params)
-        structured_output_api_result = response_model(**output.choices[0].message.content)
-        tool_calls = output.choices[0].message.tool_calls
-        lm_response = BaseLMResponse(
-            raw_response=output.choices[0].message.content,
-            structured_output=structured_output_api_result,
-            tool_calls=tool_calls,
-        )
-        lm_config["reasoning_effort"] = reasoning_effort
-        used_cache_handler.add_to_managed_cache(
-            model, messages, lm_config=lm_config, output=lm_response, tools=tools
-        )
-        return lm_response

synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

synth-ai 0.2.9.dev0py3-none-any.whl → 0.2.23.dev3py3-none-any.whl