synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (889) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. examples/rl/task_app/math_single_step.py +990 -0
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +63 -0
  461. synth_ai/api/train/builders.py +473 -0
  462. synth_ai/api/train/cli.py +1185 -0
  463. synth_ai/api/train/config_finder.py +246 -0
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +352 -0
  470. synth_ai/api/train/pollers.py +91 -0
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +195 -0
  475. synth_ai/api/train/utils.py +244 -0
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +90 -45
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +166 -114
  554. synth_ai/cli/root.py +143 -112
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +3134 -0
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +745 -416
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +7 -1
  583. synth_ai/demos/demo_task_apps/core.py +75 -37
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/config.toml +55 -110
  591. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  592. synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
  593. synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
  594. synth_ai/demos/math/__init__.py +1 -0
  595. synth_ai/demos/math/_common.py +16 -0
  596. synth_ai/demos/math/app.py +38 -0
  597. synth_ai/demos/math/config.toml +76 -0
  598. synth_ai/demos/math/deploy_modal.py +54 -0
  599. synth_ai/demos/math/modal_task_app.py +703 -0
  600. synth_ai/demos/math/task_app_entry.py +51 -0
  601. synth_ai/environments/environment/core.py +7 -1
  602. synth_ai/environments/examples/bandit/engine.py +12 -5
  603. synth_ai/environments/examples/bandit/environment.py +0 -1
  604. synth_ai/environments/examples/bandit/taskset.py +4 -4
  605. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  606. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  607. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  608. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  609. synth_ai/environments/examples/enron/engine.py +7 -2
  610. synth_ai/environments/examples/enron/environment.py +68 -0
  611. synth_ai/environments/examples/red/engine.py +60 -12
  612. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  613. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  614. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  615. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  616. synth_ai/environments/examples/red/environment.py +86 -0
  617. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  618. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  619. synth_ai/environments/examples/verilog/engine.py +104 -12
  620. synth_ai/environments/examples/wordle/environment.py +0 -1
  621. synth_ai/environments/reproducibility/tree.py +5 -6
  622. synth_ai/environments/service/app.py +11 -12
  623. synth_ai/environments/service/core_routes.py +10 -9
  624. synth_ai/environments/stateful/engine.py +1 -1
  625. synth_ai/environments/tasks/core.py +1 -0
  626. synth_ai/environments/tasks/filters.py +5 -6
  627. synth_ai/environments/tasks/utils.py +4 -5
  628. synth_ai/evals/__init__.py +15 -0
  629. synth_ai/evals/base.py +14 -5
  630. synth_ai/evals/client.py +82 -0
  631. synth_ai/evals/types.py +42 -0
  632. synth_ai/http.py +8 -22
  633. synth_ai/http_client.py +45 -12
  634. synth_ai/inference/__init__.py +0 -2
  635. synth_ai/inference/client.py +21 -7
  636. synth_ai/jobs/client.py +129 -80
  637. synth_ai/judge_schemas.py +127 -0
  638. synth_ai/learning/__init__.py +51 -6
  639. synth_ai/learning/algorithms.py +14 -0
  640. synth_ai/learning/client.py +122 -30
  641. synth_ai/learning/config.py +2 -40
  642. synth_ai/learning/constants.py +0 -2
  643. synth_ai/learning/ft_client.py +4 -56
  644. synth_ai/learning/health.py +14 -8
  645. synth_ai/learning/jobs.py +43 -47
  646. synth_ai/learning/prompt_learning_client.py +276 -0
  647. synth_ai/learning/prompt_learning_types.py +185 -0
  648. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  649. synth_ai/learning/rl/client.py +269 -0
  650. synth_ai/learning/rl/config.py +31 -0
  651. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  652. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  653. synth_ai/learning/rl/secrets.py +13 -0
  654. synth_ai/learning/rl_client.py +2 -253
  655. synth_ai/learning/sft/__init__.py +29 -0
  656. synth_ai/learning/sft/client.py +68 -0
  657. synth_ai/learning/sft/config.py +270 -0
  658. synth_ai/learning/sft/data.py +698 -0
  659. synth_ai/learning/sse.py +25 -26
  660. synth_ai/learning/validators.py +29 -25
  661. synth_ai/mcp/__init__.py +5 -0
  662. synth_ai/mcp/__main__.py +8 -0
  663. synth_ai/mcp/main.py +254 -0
  664. synth_ai/mcp/setup.py +100 -0
  665. synth_ai/modal.py +257 -0
  666. synth_ai/pricing/__init__.py +3 -0
  667. synth_ai/pricing/model_pricing.py +64 -0
  668. synth_ai/session/__init__.py +75 -0
  669. synth_ai/session/client.py +383 -0
  670. synth_ai/session/constants.py +63 -0
  671. synth_ai/session/exceptions.py +105 -0
  672. synth_ai/session/manager.py +139 -0
  673. synth_ai/session/models.py +89 -0
  674. synth_ai/session/query.py +110 -0
  675. synth_ai/spec/__init__.py +46 -0
  676. synth_ai/spec/dataclasses.py +149 -0
  677. synth_ai/spec/loader.py +144 -0
  678. synth_ai/spec/serializer.py +199 -0
  679. synth_ai/spec/validation.py +250 -0
  680. synth_ai/streaming/__init__.py +29 -0
  681. synth_ai/streaming/config.py +94 -0
  682. synth_ai/streaming/handlers.py +589 -0
  683. synth_ai/streaming/streamer.py +320 -0
  684. synth_ai/streaming/types.py +95 -0
  685. synth_ai/task/__init__.py +116 -3
  686. synth_ai/task/apps/__init__.py +132 -0
  687. synth_ai/task/auth.py +165 -0
  688. synth_ai/task/client.py +167 -0
  689. synth_ai/task/config.py +261 -0
  690. synth_ai/task/contracts.py +173 -57
  691. synth_ai/task/datasets.py +108 -0
  692. synth_ai/task/errors.py +50 -0
  693. synth_ai/task/health.py +17 -11
  694. synth_ai/task/inference_api.py +101 -0
  695. synth_ai/task/json.py +111 -0
  696. synth_ai/task/proxy.py +251 -0
  697. synth_ai/task/rubrics/__init__.py +55 -0
  698. synth_ai/task/rubrics/loaders.py +156 -0
  699. synth_ai/task/rubrics/models.py +57 -0
  700. synth_ai/task/rubrics/scoring.py +116 -0
  701. synth_ai/task/rubrics/strict.py +149 -0
  702. synth_ai/task/rubrics.py +219 -0
  703. synth_ai/task/server.py +432 -0
  704. synth_ai/task/trace_correlation_helpers.py +328 -0
  705. synth_ai/task/tracing_utils.py +95 -0
  706. synth_ai/task/validators.py +449 -6
  707. synth_ai/task/vendors.py +59 -0
  708. synth_ai/tracing_v3/__init__.py +4 -0
  709. synth_ai/tracing_v3/abstractions.py +21 -4
  710. synth_ai/tracing_v3/config.py +167 -22
  711. synth_ai/tracing_v3/constants.py +21 -0
  712. synth_ai/tracing_v3/db_config.py +42 -29
  713. synth_ai/tracing_v3/decorators.py +80 -45
  714. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  715. synth_ai/tracing_v3/hooks.py +6 -4
  716. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  717. synth_ai/tracing_v3/migration_helper.py +1 -2
  718. synth_ai/tracing_v3/replica_sync.py +12 -7
  719. synth_ai/tracing_v3/serialization.py +130 -0
  720. synth_ai/tracing_v3/session_tracer.py +86 -21
  721. synth_ai/tracing_v3/storage/base.py +98 -12
  722. synth_ai/tracing_v3/storage/config.py +63 -16
  723. synth_ai/tracing_v3/storage/factory.py +11 -9
  724. synth_ai/tracing_v3/storage/utils.py +15 -11
  725. synth_ai/tracing_v3/trace_utils.py +317 -0
  726. synth_ai/tracing_v3/turso/__init__.py +8 -21
  727. synth_ai/tracing_v3/turso/daemon.py +123 -15
  728. synth_ai/tracing_v3/turso/models.py +5 -2
  729. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  730. synth_ai/tracing_v3/utils.py +5 -4
  731. synth_ai/tunnel.py +143 -0
  732. synth_ai/tunnel_deploy.py +278 -0
  733. synth_ai/types.py +8 -0
  734. synth_ai/urls.py +11 -0
  735. synth_ai/utils/__init__.py +166 -0
  736. synth_ai/utils/agents.py +74 -0
  737. synth_ai/utils/apps.py +152 -0
  738. synth_ai/utils/base_url.py +94 -0
  739. synth_ai/utils/bin.py +39 -0
  740. synth_ai/utils/claude.py +36 -0
  741. synth_ai/utils/cli.py +284 -0
  742. synth_ai/utils/config.py +81 -0
  743. synth_ai/utils/env.py +346 -0
  744. synth_ai/utils/errors.py +85 -0
  745. synth_ai/utils/http.py +172 -0
  746. synth_ai/utils/json.py +72 -0
  747. synth_ai/utils/log_filter.py +99 -0
  748. synth_ai/utils/logging.py +198 -0
  749. synth_ai/utils/modal.py +299 -0
  750. synth_ai/utils/paths.py +95 -0
  751. synth_ai/utils/process.py +233 -0
  752. synth_ai/utils/prompts.py +39 -0
  753. synth_ai/utils/sqld.py +122 -0
  754. synth_ai/utils/ssl.py +25 -0
  755. synth_ai/utils/task_app_discovery.py +882 -0
  756. synth_ai/utils/task_app_env.py +186 -0
  757. synth_ai/utils/task_app_state.py +318 -0
  758. synth_ai/utils/tunnel/__init__.py +12 -0
  759. synth_ai/utils/tunnel/config.py +55 -0
  760. synth_ai/utils/user_config.py +137 -0
  761. synth_ai/uvicorn.py +77 -0
  762. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  763. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  764. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  765. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  766. synth_ai/cli/man.py +0 -106
  767. synth_ai/core/experiment.py +0 -15
  768. synth_ai/core/system.py +0 -15
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -63
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/tracing/__init__.py +0 -30
  838. synth_ai/tracing_v1/__init__.py +0 -33
  839. synth_ai/tracing_v3/turso/manager.py +0 -760
  840. synth_ai/v0/tracing/abstractions.py +0 -224
  841. synth_ai/v0/tracing/base_client.py +0 -91
  842. synth_ai/v0/tracing/client_manager.py +0 -131
  843. synth_ai/v0/tracing/config.py +0 -142
  844. synth_ai/v0/tracing/context.py +0 -146
  845. synth_ai/v0/tracing/decorators.py +0 -682
  846. synth_ai/v0/tracing/events/__init__.py +0 -0
  847. synth_ai/v0/tracing/events/manage.py +0 -147
  848. synth_ai/v0/tracing/events/scope.py +0 -86
  849. synth_ai/v0/tracing/events/store.py +0 -228
  850. synth_ai/v0/tracing/immediate_client.py +0 -151
  851. synth_ai/v0/tracing/local.py +0 -18
  852. synth_ai/v0/tracing/log_client_base.py +0 -73
  853. synth_ai/v0/tracing/retry_queue.py +0 -186
  854. synth_ai/v0/tracing/trackers.py +0 -515
  855. synth_ai/v0/tracing/upload.py +0 -512
  856. synth_ai/v0/tracing/utils.py +0 -9
  857. synth_ai/v0/tracing_v1/__init__.py +0 -16
  858. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  859. synth_ai/v0/tracing_v1/base_client.py +0 -91
  860. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  861. synth_ai/v0/tracing_v1/config.py +0 -142
  862. synth_ai/v0/tracing_v1/context.py +0 -146
  863. synth_ai/v0/tracing_v1/decorators.py +0 -703
  864. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  865. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  866. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  867. synth_ai/v0/tracing_v1/events/store.py +0 -228
  868. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  869. synth_ai/v0/tracing_v1/local.py +0 -18
  870. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  871. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  872. synth_ai/v0/tracing_v1/trackers.py +0 -515
  873. synth_ai/v0/tracing_v1/upload.py +0 -527
  874. synth_ai/v0/tracing_v1/utils.py +0 -9
  875. synth_ai/zyk/__init__.py +0 -30
  876. synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
  877. synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
  878. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  879. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  880. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  881. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  882. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  885. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  886. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  887. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  888. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  889. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,999 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import contextlib
5
+ import json
6
+ import logging
7
+ import os
8
+ import time
9
+ from typing import Any
10
+ from urllib.parse import urlparse, urlunparse
11
+
12
+ import click
13
+ import httpx
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class OpenAIClient:
19
+ """Async HTTP client for OpenAI-compatible inference servers (vLLM)."""
20
+
21
+ def __init__(
22
+ self,
23
+ base_url: str,
24
+ api_key: str | None = None,
25
+ timeout_s: float = 120.0,
26
+ ) -> None:
27
+ self.base_url = base_url.rstrip("/")
28
+ self.api_key = api_key
29
+ self.timeout_s = timeout_s
30
+ self.headers = {}
31
+ # If we're calling back into our own task app proxy (e.g., /proxy/groq),
32
+ # the FastAPI app still enforces X-API-Key. Include it when available so
33
+ # intra-app proxy calls authenticate correctly.
34
+ try:
35
+ env_key = os.getenv("ENVIRONMENT_API_KEY")
36
+ if env_key and isinstance(env_key, str):
37
+ self.headers.setdefault("X-API-Key", env_key)
38
+ except Exception:
39
+ pass
40
+
41
+ def _fix_model_parameters(
42
+ self, request: dict[str, Any], target_url: str | None = None
43
+ ) -> dict[str, Any]:
44
+ """
45
+ Fix parameter compatibility for newer OpenAI models.
46
+
47
+ Newer models like gpt-5-nano use 'max_completion_tokens' instead of 'max_tokens'.
48
+ """
49
+ if not request:
50
+ return request
51
+
52
+ # Make a copy to avoid modifying the original
53
+ fixed_request = request.copy()
54
+
55
+ # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI).
56
+ # Groq shares the API surface but we keep tool enforcement fields intact.
57
+ is_openai = False
58
+ is_groq = False
59
+ try:
60
+ if isinstance(target_url, str):
61
+ low = target_url.lower()
62
+ if "groq.com" in low or "/proxy/groq" in low:
63
+ is_groq = True
64
+ elif ("openai.com" in low) or ("azure" in low and ".openai." in low) or (
65
+ "/proxy/openai" in low
66
+ ):
67
+ is_openai = True
68
+ except Exception:
69
+ is_openai = False
70
+
71
+ model = fixed_request.get("model", "")
72
+
73
+ if is_openai:
74
+ # Remove fields OpenAI/Groq don't accept
75
+ for k in (
76
+ "stop_after_tool_calls",
77
+ "thinking_mode",
78
+ "thinking_budget",
79
+ "reasoning",
80
+ "extra_body",
81
+ "parallel_tool_calls",
82
+ "function_call",
83
+ ):
84
+ if k in fixed_request:
85
+ fixed_request.pop(k, None)
86
+
87
+ # GPT-5 family specifics
88
+ if "gpt-5" in model or "gpt-4.1" in model:
89
+ # Convert max_tokens to max_completion_tokens for newer models
90
+ if "max_tokens" in fixed_request:
91
+ if "max_completion_tokens" not in fixed_request:
92
+ fixed_request["max_completion_tokens"] = fixed_request.pop("max_tokens")
93
+ logger.info(
94
+ f"Converted max_tokens to max_completion_tokens for model {model}"
95
+ )
96
+ else:
97
+ fixed_request.pop("max_tokens")
98
+ logger.info(f"Removed conflicting max_tokens parameter for model {model}")
99
+ # Some OpenAI endpoints ignore/deny sampling fields for reasoning models
100
+ for k in ("temperature", "top_p"):
101
+ if k in fixed_request:
102
+ fixed_request.pop(k, None)
103
+ # If tools are present, force single tool choice to our function
104
+ try:
105
+ tools = fixed_request.get("tools")
106
+ if isinstance(tools, list) and tools:
107
+ # Choose the first provided function name from tools schema (e.g., run_command)
108
+ func_name = None
109
+ for t in tools:
110
+ try:
111
+ cand = None
112
+ if isinstance(t, dict):
113
+ f = t.get("function")
114
+ if isinstance(f, dict):
115
+ cand = f.get("name")
116
+ if isinstance(cand, str) and cand:
117
+ func_name = cand
118
+ break
119
+ except Exception:
120
+ continue
121
+ if not func_name:
122
+ func_name = "run_command"
123
+ fixed_request["tool_choice"] = {
124
+ "type": "function",
125
+ "function": {"name": func_name},
126
+ }
127
+ fixed_request["parallel_tool_calls"] = False
128
+ except Exception:
129
+ pass
130
+
131
+ return fixed_request
132
+
133
+ async def generate(
134
+ self,
135
+ request: dict[str, Any],
136
+ base_url: str | None = None,
137
+ timeout_s: float | None = None,
138
+ extra_headers: dict[str, str] | None = None,
139
+ ) -> dict[str, Any]:
140
+ """
141
+ Send a chat completion request to the inference server.
142
+
143
+ Args:
144
+ request: OpenAI-compatible chat completion request
145
+ base_url: Override base URL for this request
146
+ timeout_s: Override timeout for this request
147
+ extra_headers: Additional headers to include (e.g., X-Policy-Name)
148
+
149
+ Returns:
150
+ OpenAI-compatible chat completion response
151
+ """
152
+ base = (base_url or self.base_url).rstrip("/")
153
+ # Ensure processed_request is defined for error logging paths
154
+ processed_request: dict[str, Any] = dict(request or {})
155
+
156
+ # Bulletproof normalization BEFORE any parsing
157
+ def _local_force_normalize(u: str) -> str:
158
+ if not isinstance(u, str) or not u:
159
+ return u
160
+ p = urlparse(u)
161
+ path = (p.path or "").rstrip("/")
162
+ q = p.query or ""
163
+ # If query contains a path segment, extract and repair
164
+ if q and "/" in q:
165
+ before, after = q.split("/", 1)
166
+ # Split off any extra query parameters that were appended after the path
167
+ cut_positions = [i for i in [after.find("&"), after.find("?")] if i >= 0]
168
+ cut = min(cut_positions) if cut_positions else len(after)
169
+ path_from_query = "/" + after[:cut]
170
+ extra_query = after[cut + 1 :] if cut < len(after) else ""
171
+ merged_query = before
172
+ if extra_query:
173
+ merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
174
+ # Ensure final path
175
+ final_path = path_from_query if path_from_query.startswith("/v1/chat/completions") else f"{path_from_query.rstrip('/')}/v1/chat/completions"
176
+ p = p._replace(path=final_path, query=merged_query)
177
+ u = urlunparse(p)
178
+ p = urlparse(u)
179
+ path = p.path or ""
180
+ q = p.query or ""
181
+ if not path.endswith("/v1/chat/completions"):
182
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
183
+ p = p._replace(path=new_path)
184
+ u = urlunparse(p)
185
+ p = urlparse(u)
186
+ q = p.query or ""
187
+ if q and "/" in q:
188
+ # Last-resort: drop anything after first '/'
189
+ safe_q = q.split("/")[0]
190
+ p = p._replace(query=safe_q)
191
+ u = urlunparse(p)
192
+ return u
193
+
194
+ norm_base = None
195
+ try:
196
+ # Try importing shared normalizer first
197
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.utils import (
198
+ force_normalize_chat_completions_url,
199
+ )
200
+ norm_base = force_normalize_chat_completions_url(base)
201
+ except Exception:
202
+ norm_base = _local_force_normalize(base)
203
+ base = norm_base or base
204
+ # Parse URL to handle query parameters correctly
205
+ parsed = urlparse(base)
206
+ path = parsed.path.rstrip("/")
207
+ query = parsed.query
208
+
209
+ # Debug: Log URL parsing
210
+ logger.error(f"[URL_PARSE] base={base} parsed.path={parsed.path} parsed.query={parsed.query}")
211
+
212
+ # CRITICAL FIX: Handle malformed URLs where path is incorrectly in the query string
213
+ # Example: https://host?cid=trace_123/v1/chat/completions
214
+ # Should be: https://host/v1/chat/completions?cid=trace_123
215
+
216
+ # ALWAYS check for malformed URLs - this is CRITICAL
217
+ # CRASH IMMEDIATELY if URL is malformed - don't let it through!
218
+ if query and "/" in query:
219
+ logger.error(f"[URL_FATAL] MALFORMED URL DETECTED AT START: base={base} query={query}")
220
+ # Try to fix it
221
+ logger.error(f"[URL_FIX_TRIGGERED] Query contains '/': query={query}")
222
+ # This is a malformed URL - extract path from query and fix it
223
+ logger.error(
224
+ f"[URL_FIX] Malformed URL detected: {base}\n"
225
+ f"Query contains path segments. Fixing..."
226
+ )
227
+
228
+ # Find where the path starts in the query string
229
+ # The query format is: "cid=value/path" or similar
230
+ # We need to find the first "/" that starts a path segment
231
+ query_parts = query.split("/", 1)
232
+ if len(query_parts) == 2:
233
+ # query_parts[0] is the actual query (e.g., "cid=trace_123")
234
+ # query_parts[1] is the path that was incorrectly put in query
235
+ actual_query = query_parts[0]
236
+ path_and_more = query_parts[1] # Could be "v1/chat/completions" or "v1/chat/completions&foo=bar"
237
+
238
+ # Extract the path part (everything before "&" or "?" if present)
239
+ # Handle both "&" (query param separator) and "?" (another malformed query separator)
240
+ if "&" in path_and_more:
241
+ # Path is followed by more query params (separated by &)
242
+ path_segment, extra_query = path_and_more.split("&", 1)
243
+ path_in_query = "/" + path_segment # Restore leading slash
244
+ # Merge extra query params with actual_query
245
+ actual_query = f"{actual_query}&{extra_query}"
246
+ elif "?" in path_and_more:
247
+ # Path is followed by more query params (separated by ?, which is malformed)
248
+ path_segment, extra_query = path_and_more.split("?", 1)
249
+ path_in_query = "/" + path_segment # Restore leading slash
250
+ # Merge extra query params with actual_query (use & as separator)
251
+ actual_query = f"{actual_query}&{extra_query}"
252
+ else:
253
+ # No extra query params, just the path
254
+ path_in_query = "/" + path_and_more # Restore leading slash
255
+
256
+ # If the path_in_query already contains /v1/chat/completions, use it
257
+ # Otherwise, append /v1/chat/completions
258
+ if path_in_query.startswith("/v1/chat/completions"):
259
+ final_path = path_in_query
260
+ else:
261
+ # Append /v1/chat/completions to whatever path we found
262
+ final_path = path_in_query.rstrip("/") + "/v1/chat/completions"
263
+
264
+ # Reconstruct URL correctly: path comes before query
265
+ parsed = parsed._replace(path=final_path, query=actual_query)
266
+ url = urlunparse(parsed)
267
+ logger.warning(f"[URL_FIX] Fixed malformed URL:\n FROM: {base}\n TO: {url}")
268
+ else:
269
+ # Can't parse, fall through to normal processing
270
+ logger.error(f"[URL_FIX] Could not parse malformed query: {query}")
271
+ path = parsed.path.rstrip("/")
272
+ if not path.endswith("/v1/chat/completions"):
273
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
274
+ parsed = parsed._replace(path=new_path)
275
+ url = urlunparse(parsed)
276
+ else:
277
+ url = base
278
+ # Normal case: query params are separate from path
279
+ elif path.endswith("/v1/chat/completions"):
280
+ url = base
281
+ else:
282
+ # Append /v1/chat/completions to the path, preserving query params
283
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
284
+ parsed = parsed._replace(path=new_path)
285
+ url = urlunparse(parsed)
286
+ logger.debug(f"[URL_CONSTRUCT] Added path to URL: {base} -> {url}")
287
+
288
+ # FINAL VALIDATION: Ensure the constructed URL is correct
289
+ final_parsed = urlparse(url)
290
+ final_path = final_parsed.path or ""
291
+ final_query = final_parsed.query or ""
292
+
293
+ # Verify path is correct
294
+ if not final_path.endswith("/v1/chat/completions"):
295
+ error_msg = (
296
+ f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
297
+ f"Original: {base}\n"
298
+ f"Constructed: {url}\n"
299
+ f"Path: {final_path}\n"
300
+ )
301
+ logger.error(error_msg)
302
+ raise ValueError(error_msg)
303
+
304
+ # Verify query doesn't contain path segments
305
+ if final_query and "/" in final_query:
306
+ error_msg = (
307
+ f"FATAL [OpenAIClient]: Query still contains path segments after fix!\n"
308
+ f"Original: {base}\n"
309
+ f"Constructed: {url}\n"
310
+ f"Query: {final_query}\n"
311
+ f"This indicates a bug in URL construction logic."
312
+ )
313
+ logger.error(error_msg)
314
+ raise ValueError(error_msg)
315
+
316
+ timeout = timeout_s or self.timeout_s
317
+
318
+ # Merge headers
319
+ headers = self.headers.copy()
320
+ if extra_headers:
321
+ headers.update(extra_headers)
322
+ # Always include X-API-Key for intra-app requests
323
+ try:
324
+ envk = os.getenv("ENVIRONMENT_API_KEY")
325
+ if envk and isinstance(envk, str):
326
+ headers["X-API-Key"] = envk
327
+ except Exception:
328
+ pass
329
+
330
+ # Set Authorization header based on the target URL
331
+ try:
332
+ low_url = (url or "").lower()
333
+
334
+ # If calling OpenAI directly (api.openai.com)
335
+ if "api.openai.com" in low_url:
336
+ openai_key = os.getenv("OPENAI_API_KEY")
337
+ if openai_key and isinstance(openai_key, str):
338
+ headers["Authorization"] = f"Bearer {openai_key}"
339
+
340
+ # If target is Synth backend (any deployment), use SYNTH_API_KEY
341
+ # Matches: synth-backend-*, agent-learning*, localhost:8000, 127.0.0.1:8000
342
+ elif any(pattern in low_url for pattern in [
343
+ "synth-backend", "synth.run", "agent-learning",
344
+ "localhost:8000", "127.0.0.1:8000"
345
+ ]):
346
+ synth_key = os.getenv("SYNTH_API_KEY")
347
+ if synth_key and isinstance(synth_key, str):
348
+ headers["Authorization"] = f"Bearer {synth_key}"
349
+
350
+ # If target is Groq, use GROQ_API_KEY
351
+ elif "/proxy/groq" in low_url or "api.groq.com" in low_url:
352
+ gk = os.getenv("GROQ_API_KEY")
353
+ if gk and isinstance(gk, str):
354
+ headers["Authorization"] = f"Bearer {gk}"
355
+ except Exception:
356
+ pass
357
+
358
+ # In-process proxy path: avoid HTTP round-trip and auth dependency
359
+ try:
360
+ if base.endswith("/proxy/groq") or base.endswith("/proxy/groq/"):
361
+ from synth_ai.task.server import prepare_for_groq, inject_system_hint
362
+ # Prepare payload similar to server-side proxy
363
+ model = request.get("model") if isinstance(request.get("model"), str) else None
364
+ payload = prepare_for_groq(model, request)
365
+ payload = inject_system_hint(payload, "")
366
+ # Call vendor directly
367
+ gk = os.getenv("GROQ_API_KEY") or ""
368
+ async with httpx.AsyncClient(timeout=timeout) as client:
369
+ resp = await client.post(
370
+ "https://api.groq.com/openai/v1/chat/completions",
371
+ json=payload,
372
+ headers={"Authorization": f"Bearer {gk}"},
373
+ )
374
+ resp.raise_for_status()
375
+ return resp.json()
376
+ except Exception as _local_proxy_err:
377
+ # Do NOT fall back silently; surface the error so callers fail fast
378
+ raise
379
+
380
+ # DEBUG: Log request BEFORE _fix_model_parameters
381
+ logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Request message[1] content type: {type(request.get('messages', [])[1].get('content') if len(request.get('messages', [])) > 1 else None)}")
382
+ if len(request.get("messages", [])) > 1:
383
+ msg1_content = request["messages"][1].get("content")
384
+ logger.debug(f"🔊 [OPENAI_CLIENT_PRE_FIX] Message[1] content value: {msg1_content if not isinstance(msg1_content, list) else f'list[{len(msg1_content)}]'}")
385
+
386
+ # Fix parameter compatibility for newer models
387
+ processed_request = self._fix_model_parameters(request, target_url=url)
388
+
389
+ # DEBUG: Log request AFTER _fix_model_parameters
390
+ logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Processed message[1] content type: {type(processed_request.get('messages', [])[1].get('content') if len(processed_request.get('messages', [])) > 1 else None)}")
391
+ if len(processed_request.get("messages", [])) > 1:
392
+ msg1_content_post = processed_request["messages"][1].get("content")
393
+ logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
394
+
395
+ # Log request (redact messages in production)
396
+ # CRITICAL: Verify URL is correct BEFORE making HTTP request
397
+ final_parsed_check = urlparse(url)
398
+ logger.error(f"[URL_FINAL_CHECK] Before HTTP request: url={url} path={final_parsed_check.path} query={final_parsed_check.query}")
399
+
400
+ # CRASH IF URL IS STILL MALFORMED - DO NOT PROCEED
401
+ if final_parsed_check.query and "/" in final_parsed_check.query:
402
+ error_msg = (
403
+ f"FATAL [OpenAIClient]: URL IS STILL MALFORMED AFTER FIX ATTEMPT!\n"
404
+ f"Original base_url: {base_url or self.base_url}\n"
405
+ f"Constructed URL: {url}\n"
406
+ f"Path: {final_parsed_check.path}\n"
407
+ f"Query (contains path): {final_parsed_check.query}\n"
408
+ f"This will cause a 404 error. CRASHING NOW to prevent bad request."
409
+ )
410
+ logger.error(error_msg)
411
+ raise ValueError(error_msg)
412
+
413
+ # Verify path is correct
414
+ if not final_parsed_check.path.endswith("/v1/chat/completions"):
415
+ error_msg = (
416
+ f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
417
+ f"URL: {url}\n"
418
+ f"Path: {final_parsed_check.path}\n"
419
+ )
420
+ logger.error(error_msg)
421
+ raise ValueError(error_msg)
422
+
423
+ # Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
424
+ logger.info(f"Inference POST target: {url}")
425
+ if extra_headers:
426
+ logger.info(f"Extra headers: {extra_headers}")
427
+ with contextlib.suppress(Exception):
428
+ keys_preview = sorted(processed_request.keys())
429
+ logger.info(f"Request keys: {keys_preview}")
430
+
431
+ # Detailed IO log: messages/tools/sampling and final payload fields
432
+ try:
433
+ import json as _json
434
+
435
+ def _truncate(text: str, limit: int = 2000) -> str:
436
+ return text if len(text) <= limit else text[:limit] + "…"
437
+
438
+ def _messages_preview(msgs: Any) -> str:
439
+ try:
440
+ out: list[dict[str, Any]] = []
441
+ if isinstance(msgs, list):
442
+ for m in msgs:
443
+ if not isinstance(m, dict):
444
+ continue
445
+ role = m.get("role")
446
+ content = m.get("content")
447
+ if isinstance(content, str):
448
+ text = content
449
+ elif isinstance(content, list):
450
+ parts: list[str] = []
451
+ for seg in content:
452
+ if isinstance(seg, dict) and isinstance(seg.get("text"), str):
453
+ parts.append(seg["text"])
454
+ text = "\n".join(parts)
455
+ else:
456
+ text = ""
457
+ out.append({"role": role, "content": _truncate(str(text), 4000)})
458
+ return _json.dumps(out)
459
+ except Exception:
460
+ return "[]"
461
+
462
+ def _tools_preview(tools: Any) -> str:
463
+ try:
464
+ return _truncate(_json.dumps(tools), 4000)
465
+ except Exception:
466
+ return "[]"
467
+
468
+ msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
469
+ tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
470
+ io_log: dict[str, Any] = {
471
+ "llm.call": True,
472
+ "model": processed_request.get("model") if isinstance(processed_request, dict) else None,
473
+ "tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
474
+ "parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
475
+ "stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
476
+ "temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
477
+ "top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
478
+ "max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
479
+ "max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
480
+ "messages_preview": _messages_preview(msgs),
481
+ "tools_preview": _tools_preview(tools),
482
+ }
483
+ logger.info(io_log)
484
+ except Exception:
485
+ pass
486
+
487
+ # Final hard-guard for OpenAI/Groq: drop unsupported field
488
+ try:
489
+ low_url = url.lower()
490
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
491
+ processed_request.pop("stop_after_tool_calls", None)
492
+ logger.info("Removed stop_after_tool_calls for %s request", "Groq/OpenAI")
493
+ # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
494
+ if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
495
+ processed_request, dict
496
+ ):
497
+ rf = processed_request.get("response_format")
498
+ rf_type = None
499
+ if isinstance(rf, dict):
500
+ rf_type = str(rf.get("type") or "").lower()
501
+ if rf_type in {"json_object", "json_schema"}:
502
+ msgs = processed_request.get("messages")
503
+ has_json_word = False
504
+ if isinstance(msgs, list):
505
+ for m in msgs:
506
+ try:
507
+ content = m.get("content") if isinstance(m, dict) else None
508
+ text = None
509
+ if isinstance(content, str):
510
+ text = content
511
+ elif isinstance(content, list):
512
+ # Join any text segments
513
+ parts = []
514
+ for seg in content:
515
+ if isinstance(seg, dict) and isinstance(
516
+ seg.get("text"), str
517
+ ):
518
+ parts.append(seg["text"])
519
+ text = "\n".join(parts)
520
+ if isinstance(text, str) and ("json" in text.lower()):
521
+ has_json_word = True
522
+ break
523
+ except Exception:
524
+ continue
525
+ if not has_json_word:
526
+ try:
527
+ instruction = (
528
+ "Respond in strict JSON only. Output a single valid JSON object."
529
+ )
530
+ if not isinstance(msgs, list):
531
+ msgs = []
532
+ # Prepend a system message to satisfy Groq requirement without changing user intent
533
+ prepend = {"role": "system", "content": instruction}
534
+ processed_request["messages"] = [prepend] + list(msgs)
535
+ logger.info(
536
+ "Injected JSON-mode system instruction for Groq response_format compliance"
537
+ )
538
+ except Exception:
539
+ pass
540
+ except Exception:
541
+ pass
542
+
543
+ async with httpx.AsyncClient(timeout=timeout) as client:
544
+ try:
545
+ response = await client.post(
546
+ url,
547
+ json=processed_request,
548
+ headers=headers,
549
+ )
550
+ response.raise_for_status()
551
+
552
+ # Rich response diagnostics
553
+ content_type = response.headers.get("content-type")
554
+ body_text = response.text
555
+ logger.info(
556
+ f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
557
+ )
558
+ if body_text:
559
+ # Log raw output with generous preview to debug no-tool-call issues
560
+ preview_len = min(4000, len(body_text))
561
+ logger.info({
562
+ "llm.raw_response": True,
563
+ "bytes": len(body_text),
564
+ "preview": body_text[:preview_len],
565
+ })
566
+
567
+ result = response.json()
568
+ logger.info(f"Inference response parsed_type={type(result).__name__}")
569
+
570
+ tool_call_count = -1
571
+ # Normalize tool calls so downstream always sees a function tool call
572
+ try:
573
+ if isinstance(result, dict):
574
+ choices = result.get("choices")
575
+ if isinstance(choices, list) and choices:
576
+ msg = choices[0].get("message")
577
+ if isinstance(msg, dict):
578
+ # Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
579
+ tc = msg.get("tool_calls")
580
+ fc = msg.get("function_call")
581
+ if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
582
+ name = fc.get("name") or "interact_many"
583
+ args = fc.get("arguments") or "{}"
584
+ msg["tool_calls"] = [
585
+ {
586
+ "id": "call_norm",
587
+ "type": "function",
588
+ "function": {"name": name, "arguments": args},
589
+ }
590
+ ]
591
+ if isinstance(choices[0], dict):
592
+ choices[0]["finish_reason"] = "tool_calls"
593
+ # Log tool call count for debugging
594
+ try:
595
+ tc2 = msg.get("tool_calls")
596
+ count = len(tc2) if isinstance(tc2, list) else 0
597
+ logger.info({
598
+ "llm.tool_calls": True,
599
+ "count": count,
600
+ "finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
601
+ })
602
+ if count == 0:
603
+ click.echo(
604
+ "[openai-client] ✗ upstream response missing tool_calls; dumping preview to logs",
605
+ err=True,
606
+ )
607
+ logger.error(
608
+ "Inference response missing tool_calls; failing fast. Raw body preview: %s",
609
+ body_text[:500] if body_text else "<empty>",
610
+ )
611
+ raise ValueError("Inference response missing tool_calls")
612
+ tool_call_count = count
613
+ except Exception:
614
+ pass
615
+ except Exception:
616
+ pass
617
+
618
+ click.echo(
619
+ f"[openai-client] ✓ response ok with tool_calls={tool_call_count}",
620
+ err=True,
621
+ )
622
+ return result
623
+
624
+ except httpx.TimeoutException:
625
+ logger.error(f"Request to {url} timed out after {timeout}s")
626
+ raise
627
+ except httpx.HTTPStatusError as e:
628
+ status = e.response.status_code if e.response is not None else None
629
+ text = e.response.text if e.response is not None else str(e)
630
+ # Log full body and request diagnostics for debugging remote failures
631
+ try:
632
+ redacted_headers = dict(headers)
633
+ if "Authorization" in redacted_headers:
634
+ redacted_headers["Authorization"] = "***REDACTED***"
635
+ logger.error(
636
+ {
637
+ "openai_http_error": True,
638
+ "status": status,
639
+ "url": url,
640
+ "body": text,
641
+ }
642
+ )
643
+ logger.error(
644
+ {
645
+ "request_debug": True,
646
+ "status": status,
647
+ "target": url,
648
+ "headers": redacted_headers,
649
+ "payload": processed_request,
650
+ }
651
+ )
652
+ except Exception:
653
+ logger.error(f"HTTP error from {url}: {status} - {text}")
654
+ # Special case: token budget exceeded handled below, else 422 degrade, else re-raise
655
+ try:
656
+ if status == 400 and e.response is not None:
657
+ data = e.response.json()
658
+ detail = data.get("detail") if isinstance(data, dict) else None
659
+ err_code = (detail or {}).get("error") if isinstance(detail, dict) else None
660
+ if err_code == "token_budget_exceeded":
661
+ info = (detail or {}).get("details") or {}
662
+ messages_tokens = int(info.get("messages_tokens") or 0)
663
+ model_limit = int(info.get("model_limit") or 0)
664
+ safety = 64
665
+ # Compute a conservative new max_tokens
666
+ new_max = max(16, model_limit - messages_tokens - safety)
667
+ try:
668
+ # Update request and retry once immediately with smaller budget
669
+ if isinstance(processed_request, dict):
670
+ processed_request = dict(processed_request)
671
+ if "max_completion_tokens" in processed_request:
672
+ processed_request["max_completion_tokens"] = new_max
673
+ processed_request.pop("max_tokens", None)
674
+ else:
675
+ processed_request["max_tokens"] = new_max
676
+ # Remove optional fields that some servers reject
677
+ for k in ("thinking_mode", "thinking_budget", "reasoning"):
678
+ processed_request.pop(k, None)
679
+ # Force structured tool choice
680
+ if processed_request.get("tool_choice") == "required":
681
+ func_name = "run_command"
682
+ try:
683
+ tools_arr = processed_request.get("tools") or []
684
+ if isinstance(tools_arr, list) and tools_arr:
685
+ f = (
686
+ tools_arr[0].get("function")
687
+ if isinstance(tools_arr[0], dict)
688
+ else None
689
+ )
690
+ cand = (
691
+ (f or {}).get("name")
692
+ if isinstance(f, dict)
693
+ else None
694
+ )
695
+ if isinstance(cand, str) and cand:
696
+ func_name = cand
697
+ except Exception:
698
+ pass
699
+ processed_request["tool_choice"] = {
700
+ "type": "function",
701
+ "function": {"name": func_name},
702
+ }
703
+ processed_request["parallel_tool_calls"] = False
704
+ logger.warning(
705
+ {
706
+ "token_budget_recovery": True,
707
+ "messages_tokens": messages_tokens,
708
+ "model_limit": model_limit,
709
+ "retry_max_tokens": new_max,
710
+ }
711
+ )
712
+ # Retry once with reduced budget
713
+ async with httpx.AsyncClient(timeout=timeout) as client2:
714
+ r2 = await client2.post(
715
+ url, json=processed_request, headers=headers
716
+ )
717
+ r2.raise_for_status()
718
+ return r2.json()
719
+ except Exception:
720
+ pass
721
+ except Exception:
722
+ pass
723
+ raise
724
+ except Exception as e:
725
+ logger.error(f"Unexpected error calling {url}: {e}")
726
+ raise
727
+
728
+ async def check_health(
729
+ self,
730
+ base_url: str | None = None,
731
+ timeout_s: float | None = None,
732
+ ) -> dict[str, Any]:
733
+ """
734
+ Check if the inference service is healthy.
735
+
736
+ Args:
737
+ base_url: Override base URL for this request
738
+ timeout_s: Override timeout for this request
739
+
740
+ Returns:
741
+ Health status dict with 'status' field
742
+ """
743
+ url = (base_url or self.base_url).rstrip("/") + "/health"
744
+ timeout = timeout_s or 10.0
745
+
746
+ try:
747
+ async with httpx.AsyncClient(timeout=timeout) as client:
748
+ response = await client.get(url, headers=self.headers)
749
+ response.raise_for_status()
750
+ return response.json()
751
+ except httpx.HTTPStatusError as e:
752
+ if e.response.status_code == 400:
753
+ # Service is overloaded but still responding
754
+ try:
755
+ data = e.response.json()
756
+ if data.get("status") == "overloaded":
757
+ return {"status": "overloaded", "retry_after": data.get("retry_after", 1)}
758
+ except Exception:
759
+ pass
760
+ return {"status": "unhealthy", "error": str(e)}
761
+ except Exception as e:
762
+ return {"status": "unhealthy", "error": str(e)}
763
+
764
+ async def generate_with_retries(
765
+ self,
766
+ request: dict[str, Any],
767
+ base_url: str | None = None,
768
+ timeout_s: float | None = None,
769
+ max_retries: int = 4,
770
+ backoff_factor: float = 2.0,
771
+ extra_headers: dict[str, str] | None = None,
772
+ ) -> dict[str, Any]:
773
+ """
774
+ Generate with exponential backoff retries for transient errors.
775
+
776
+ Args:
777
+ request: OpenAI-compatible chat completion request
778
+ base_url: Override base URL
779
+ timeout_s: Override timeout
780
+ max_retries: Maximum number of retry attempts
781
+ backoff_factor: Exponential backoff multiplier
782
+ extra_headers: Additional headers to include (e.g., X-Policy-Name)
783
+
784
+ Returns:
785
+ OpenAI-compatible chat completion response
786
+ """
787
+ last_error = None
788
+ processed_request: dict[str, Any] = dict(request or {})
789
+ wait_time = 1.0
790
+
791
+ for attempt in range(max_retries + 1):
792
+ try:
793
+ # Apply parameter fixes to the request
794
+ # CRITICAL: Use proper URL parsing, not string concatenation!
795
+ target_base = base_url or self.base_url
796
+ if target_base:
797
+ parsed_target = urlparse(target_base)
798
+ target_path = parsed_target.path.rstrip("/")
799
+ if not target_path.endswith("/v1/chat/completions"):
800
+ new_target_path = f"{target_path}/v1/chat/completions" if target_path else "/v1/chat/completions"
801
+ parsed_target = parsed_target._replace(path=new_target_path)
802
+ target_url = urlunparse(parsed_target)
803
+ else:
804
+ target_url = target_base
805
+ else:
806
+ target_url = None
807
+
808
+ processed_request = self._fix_model_parameters(
809
+ request,
810
+ target_url=target_url,
811
+ )
812
+ return await self.generate(
813
+ request=processed_request,
814
+ base_url=base_url,
815
+ timeout_s=timeout_s,
816
+ extra_headers=extra_headers,
817
+ )
818
+ except httpx.HTTPStatusError as e:
819
+ # Retry on 400 (overloaded), 429 (rate limit), 500 (internal error), 503 (service unavailable)
820
+ if e.response.status_code not in [400, 429, 500, 503]:
821
+ raise
822
+ last_error = e
823
+ if e.response.status_code == 400:
824
+ # Check if this is an overload error by looking at response content
825
+ try:
826
+ response_data = e.response.json()
827
+ if response_data.get("status") == "overloaded":
828
+ retry_after = response_data.get("retry_after", 1)
829
+ # Use the suggested retry_after time instead of exponential backoff for overload
830
+ wait_time = max(wait_time, float(retry_after))
831
+ logger.warning(
832
+ f"Inference service overloaded (400). {response_data} Retrying after {wait_time}s..."
833
+ )
834
+ else:
835
+ error_block = response_data.get("error")
836
+ error_code = ""
837
+ if isinstance(error_block, dict):
838
+ error_code = str(
839
+ error_block.get("code") or error_block.get("type") or ""
840
+ ).lower()
841
+ if error_code in {"tool_use_failed", "tool_call_failed"}:
842
+ logger.error(
843
+ {
844
+ "tool_use_failed": True,
845
+ "target": (base_url or self.base_url),
846
+ "message": error_block.get("message") if isinstance(error_block, dict) else None,
847
+ }
848
+ )
849
+ raise RuntimeError(
850
+ f"Inference 400 response (tool call failed): {error_block.get('message') if isinstance(error_block, dict) else 'Tool call failed'}"
851
+ ) from e
852
+ # This is a different type of 400 error, don't retry
853
+ try:
854
+ redacted_headers = {}
855
+ try:
856
+ redacted_headers = dict(self.headers)
857
+ if "Authorization" in redacted_headers:
858
+ redacted_headers["Authorization"] = "***REDACTED***"
859
+ except Exception:
860
+ redacted_headers = {}
861
+ logger.error(
862
+ {
863
+ "non_overload_400": True,
864
+ "target": (base_url or self.base_url),
865
+ "payload": processed_request,
866
+ "headers": redacted_headers,
867
+ "body": e.response.text if e.response is not None else None,
868
+ }
869
+ )
870
+ except Exception:
871
+ pass
872
+ raise RuntimeError(
873
+ f"Inference 400 response: {e.response.text if e.response is not None else 'Bad Request'}"
874
+ ) from e
875
+ except Exception:
876
+ # If we can't parse the response, don't retry 400 errors
877
+ with contextlib.suppress(Exception):
878
+ logger.error(
879
+ {
880
+ "non_overload_400_unparsed": True,
881
+ "target": (base_url or self.base_url),
882
+ "payload": processed_request,
883
+ }
884
+ )
885
+ raise RuntimeError(
886
+ f"Inference 400 response (unparsed): {e.response.text if e.response is not None else 'Bad Request'}"
887
+ ) from e
888
+ elif e.response.status_code == 503:
889
+ # Avoid referencing undefined response_data
890
+ try:
891
+ preview = (e.response.text or "")[:200]
892
+ except Exception:
893
+ preview = ""
894
+ logger.warning(
895
+ f"Flash returned 503; container may be cold starting. Retrying... body={preview}"
896
+ )
897
+ elif e.response.status_code == 500:
898
+ try:
899
+ preview = (e.response.text or "")[:200]
900
+ except Exception:
901
+ preview = ""
902
+ logger.warning(
903
+ f"Flash returned 500; inference service error. Retrying... body={preview}"
904
+ )
905
+ except httpx.TimeoutException as e:
906
+ last_error = e
907
+
908
+ if attempt < max_retries:
909
+ logger.warning(
910
+ f"Inference request failed (attempt {attempt + 1}/{max_retries + 1}), "
911
+ f"retrying in {wait_time}s..."
912
+ )
913
+ await asyncio.sleep(wait_time)
914
+ wait_time *= backoff_factor
915
+
916
+ if last_error is not None:
917
+ raise last_error
918
+ raise RuntimeError("RL inference retries exhausted with no captured exception")
919
+
920
+
921
+ def create_inference_client(
922
+ task_app: Any,
923
+ api_key: str | None = None,
924
+ ) -> OpenAIClient:
925
+ """
926
+ Create an inference client using TaskApp configuration.
927
+
928
+ Args:
929
+ task_app: TaskApp instance with vllm_base_url
930
+ api_key: Optional API key for authentication
931
+
932
+ Returns:
933
+ Configured OpenAIClient instance
934
+ """
935
+ # Fallback to environment if caller didn't provide an API key
936
+ if api_key is None:
937
+ try:
938
+ import os as _os # local import to avoid module-level side effects
939
+
940
+ api_key = _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
941
+ except Exception:
942
+ api_key = None
943
+
944
+ import json as _json
945
+ import os as _os
946
+ import time as _time
947
+
948
+ if _os.getenv("SYNTH_FAKE_INFERENCE", "").strip():
949
+
950
+ class _DummyClient:
951
+ async def generate_with_retries(
952
+ self,
953
+ request: dict[str, Any],
954
+ base_url: str | None = None,
955
+ max_retries: int = 0,
956
+ backoff_factor: float = 1.0,
957
+ extra_headers: dict[str, str] | None = None,
958
+ ) -> dict[str, Any]:
959
+ tool_call = {
960
+ "id": "call_dummy",
961
+ "type": "function",
962
+ "function": {
963
+ "name": "interact_many",
964
+ "arguments": _json.dumps({"actions": ["move_right"]}),
965
+ },
966
+ }
967
+ return {
968
+ "id": f"cmpl-{int(_time.time())}",
969
+ "object": "chat.completion",
970
+ "created": int(_time.time()),
971
+ "model": request.get("model") or "dummy-model",
972
+ "choices": [
973
+ {
974
+ "index": 0,
975
+ "message": {
976
+ "role": "assistant",
977
+ "content": "",
978
+ "tool_calls": [tool_call],
979
+ },
980
+ "finish_reason": "tool_calls",
981
+ }
982
+ ],
983
+ "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
984
+ }
985
+
986
+ async def check_health(
987
+ self,
988
+ base_url: str | None = None,
989
+ timeout_s: float | None = None,
990
+ ) -> dict[str, Any]:
991
+ return {"status": "ok", "dummy": True}
992
+
993
+ import typing as _t
994
+ return _t.cast(OpenAIClient, _DummyClient())
995
+
996
+ return OpenAIClient(
997
+ base_url=task_app.vllm_base_url,
998
+ api_key=api_key,
999
+ )