synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (890) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +60 -2
  461. synth_ai/api/train/builders.py +347 -39
  462. synth_ai/api/train/cli.py +895 -160
  463. synth_ai/api/train/config_finder.py +103 -25
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +70 -20
  470. synth_ai/api/train/pollers.py +29 -4
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +6 -4
  475. synth_ai/api/train/utils.py +64 -52
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +85 -63
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +156 -116
  554. synth_ai/cli/root.py +131 -132
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +2284 -257
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +579 -291
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  583. synth_ai/demos/demo_task_apps/core.py +64 -28
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  591. synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
  592. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  593. synth_ai/demos/math/__init__.py +1 -0
  594. synth_ai/demos/math/_common.py +16 -0
  595. synth_ai/demos/math/app.py +38 -0
  596. synth_ai/demos/math/config.toml +76 -0
  597. synth_ai/demos/math/deploy_modal.py +54 -0
  598. synth_ai/demos/math/modal_task_app.py +703 -0
  599. synth_ai/demos/math/task_app_entry.py +51 -0
  600. synth_ai/environments/environment/core.py +7 -1
  601. synth_ai/environments/examples/bandit/engine.py +12 -5
  602. synth_ai/environments/examples/bandit/environment.py +0 -1
  603. synth_ai/environments/examples/bandit/taskset.py +4 -4
  604. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  605. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  606. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  607. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  608. synth_ai/environments/examples/enron/engine.py +7 -2
  609. synth_ai/environments/examples/enron/environment.py +68 -0
  610. synth_ai/environments/examples/red/engine.py +60 -12
  611. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  612. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  613. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  614. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  615. synth_ai/environments/examples/red/environment.py +86 -0
  616. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  617. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  618. synth_ai/environments/examples/verilog/engine.py +104 -12
  619. synth_ai/environments/examples/wordle/environment.py +0 -1
  620. synth_ai/environments/reproducibility/tree.py +5 -6
  621. synth_ai/environments/service/app.py +11 -12
  622. synth_ai/environments/service/core_routes.py +10 -9
  623. synth_ai/environments/stateful/engine.py +1 -1
  624. synth_ai/environments/tasks/core.py +1 -0
  625. synth_ai/environments/tasks/filters.py +5 -6
  626. synth_ai/environments/tasks/utils.py +4 -5
  627. synth_ai/evals/__init__.py +15 -0
  628. synth_ai/evals/base.py +14 -5
  629. synth_ai/evals/client.py +82 -0
  630. synth_ai/evals/types.py +42 -0
  631. synth_ai/http.py +8 -22
  632. synth_ai/http_client.py +45 -12
  633. synth_ai/inference/__init__.py +0 -2
  634. synth_ai/inference/client.py +21 -7
  635. synth_ai/jobs/client.py +129 -80
  636. synth_ai/judge_schemas.py +127 -0
  637. synth_ai/learning/__init__.py +51 -6
  638. synth_ai/learning/algorithms.py +14 -0
  639. synth_ai/learning/client.py +122 -30
  640. synth_ai/learning/config.py +2 -40
  641. synth_ai/learning/constants.py +0 -2
  642. synth_ai/learning/ft_client.py +4 -56
  643. synth_ai/learning/health.py +14 -8
  644. synth_ai/learning/jobs.py +43 -47
  645. synth_ai/learning/prompt_learning_client.py +276 -0
  646. synth_ai/learning/prompt_learning_types.py +185 -0
  647. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  648. synth_ai/learning/rl/client.py +269 -0
  649. synth_ai/learning/rl/config.py +31 -0
  650. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  651. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  652. synth_ai/learning/rl/secrets.py +13 -0
  653. synth_ai/learning/rl_client.py +2 -253
  654. synth_ai/learning/sft/__init__.py +29 -0
  655. synth_ai/learning/sft/client.py +68 -0
  656. synth_ai/learning/sft/config.py +270 -0
  657. synth_ai/learning/sft/data.py +698 -0
  658. synth_ai/learning/sse.py +25 -26
  659. synth_ai/learning/validators.py +29 -25
  660. synth_ai/mcp/__init__.py +5 -0
  661. synth_ai/mcp/__main__.py +8 -0
  662. synth_ai/mcp/main.py +254 -0
  663. synth_ai/mcp/setup.py +100 -0
  664. synth_ai/modal.py +257 -0
  665. synth_ai/pricing/__init__.py +3 -0
  666. synth_ai/pricing/model_pricing.py +64 -0
  667. synth_ai/session/__init__.py +75 -0
  668. synth_ai/session/client.py +383 -0
  669. synth_ai/session/constants.py +63 -0
  670. synth_ai/session/exceptions.py +105 -0
  671. synth_ai/session/manager.py +139 -0
  672. synth_ai/session/models.py +89 -0
  673. synth_ai/session/query.py +110 -0
  674. synth_ai/spec/__init__.py +46 -0
  675. synth_ai/spec/dataclasses.py +149 -0
  676. synth_ai/spec/loader.py +144 -0
  677. synth_ai/spec/serializer.py +199 -0
  678. synth_ai/spec/validation.py +250 -0
  679. synth_ai/streaming/__init__.py +29 -0
  680. synth_ai/streaming/config.py +94 -0
  681. synth_ai/streaming/handlers.py +589 -0
  682. synth_ai/streaming/streamer.py +320 -0
  683. synth_ai/streaming/types.py +95 -0
  684. synth_ai/task/__init__.py +50 -30
  685. synth_ai/task/apps/__init__.py +63 -19
  686. synth_ai/task/auth.py +35 -23
  687. synth_ai/task/client.py +15 -13
  688. synth_ai/task/config.py +261 -0
  689. synth_ai/task/contracts.py +165 -64
  690. synth_ai/task/datasets.py +9 -6
  691. synth_ai/task/errors.py +11 -10
  692. synth_ai/task/health.py +17 -11
  693. synth_ai/task/inference_api.py +101 -0
  694. synth_ai/task/json.py +58 -24
  695. synth_ai/task/proxy.py +59 -66
  696. synth_ai/task/rubrics/__init__.py +55 -0
  697. synth_ai/task/rubrics/loaders.py +156 -0
  698. synth_ai/task/rubrics/models.py +57 -0
  699. synth_ai/task/rubrics/scoring.py +116 -0
  700. synth_ai/task/rubrics/strict.py +149 -0
  701. synth_ai/task/rubrics.py +22 -15
  702. synth_ai/task/server.py +65 -31
  703. synth_ai/task/trace_correlation_helpers.py +328 -0
  704. synth_ai/task/tracing_utils.py +44 -28
  705. synth_ai/task/validators.py +449 -6
  706. synth_ai/task/vendors.py +5 -7
  707. synth_ai/tracing_v3/__init__.py +4 -0
  708. synth_ai/tracing_v3/abstractions.py +21 -4
  709. synth_ai/tracing_v3/config.py +167 -22
  710. synth_ai/tracing_v3/constants.py +21 -0
  711. synth_ai/tracing_v3/db_config.py +42 -29
  712. synth_ai/tracing_v3/decorators.py +80 -45
  713. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  714. synth_ai/tracing_v3/hooks.py +6 -4
  715. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  716. synth_ai/tracing_v3/migration_helper.py +1 -2
  717. synth_ai/tracing_v3/replica_sync.py +12 -7
  718. synth_ai/tracing_v3/serialization.py +130 -0
  719. synth_ai/tracing_v3/session_tracer.py +73 -16
  720. synth_ai/tracing_v3/storage/base.py +89 -1
  721. synth_ai/tracing_v3/storage/config.py +63 -16
  722. synth_ai/tracing_v3/storage/factory.py +11 -9
  723. synth_ai/tracing_v3/storage/utils.py +15 -11
  724. synth_ai/tracing_v3/trace_utils.py +317 -0
  725. synth_ai/tracing_v3/turso/__init__.py +8 -21
  726. synth_ai/tracing_v3/turso/daemon.py +123 -15
  727. synth_ai/tracing_v3/turso/models.py +5 -2
  728. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  729. synth_ai/tracing_v3/utils.py +5 -4
  730. synth_ai/tunnel.py +143 -0
  731. synth_ai/tunnel_deploy.py +278 -0
  732. synth_ai/types.py +8 -0
  733. synth_ai/urls.py +11 -0
  734. synth_ai/utils/__init__.py +166 -0
  735. synth_ai/utils/agents.py +74 -0
  736. synth_ai/utils/apps.py +152 -0
  737. synth_ai/utils/base_url.py +94 -0
  738. synth_ai/utils/bin.py +39 -0
  739. synth_ai/utils/claude.py +36 -0
  740. synth_ai/utils/cli.py +284 -0
  741. synth_ai/utils/config.py +81 -0
  742. synth_ai/utils/env.py +346 -0
  743. synth_ai/utils/errors.py +85 -0
  744. synth_ai/utils/http.py +172 -0
  745. synth_ai/utils/json.py +72 -0
  746. synth_ai/utils/log_filter.py +99 -0
  747. synth_ai/utils/logging.py +198 -0
  748. synth_ai/utils/modal.py +299 -0
  749. synth_ai/utils/paths.py +95 -0
  750. synth_ai/utils/process.py +233 -0
  751. synth_ai/utils/prompts.py +39 -0
  752. synth_ai/utils/sqld.py +122 -0
  753. synth_ai/utils/ssl.py +25 -0
  754. synth_ai/utils/task_app_discovery.py +882 -0
  755. synth_ai/utils/task_app_env.py +186 -0
  756. synth_ai/utils/task_app_state.py +318 -0
  757. synth_ai/utils/tunnel/__init__.py +12 -0
  758. synth_ai/utils/tunnel/config.py +55 -0
  759. synth_ai/utils/user_config.py +137 -0
  760. synth_ai/uvicorn.py +77 -0
  761. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  762. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  763. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  764. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  765. synth_ai/cli/man.py +0 -106
  766. synth_ai/core/experiment.py +0 -15
  767. synth_ai/core/system.py +0 -15
  768. synth_ai/demo_registry.py +0 -258
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -107
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/task/apps/grpo_crafter.py +0 -438
  838. synth_ai/tracing/__init__.py +0 -30
  839. synth_ai/tracing_v1/__init__.py +0 -33
  840. synth_ai/tracing_v3/turso/manager.py +0 -774
  841. synth_ai/v0/tracing/abstractions.py +0 -224
  842. synth_ai/v0/tracing/base_client.py +0 -91
  843. synth_ai/v0/tracing/client_manager.py +0 -131
  844. synth_ai/v0/tracing/config.py +0 -142
  845. synth_ai/v0/tracing/context.py +0 -146
  846. synth_ai/v0/tracing/decorators.py +0 -682
  847. synth_ai/v0/tracing/events/__init__.py +0 -0
  848. synth_ai/v0/tracing/events/manage.py +0 -147
  849. synth_ai/v0/tracing/events/scope.py +0 -86
  850. synth_ai/v0/tracing/events/store.py +0 -228
  851. synth_ai/v0/tracing/immediate_client.py +0 -151
  852. synth_ai/v0/tracing/local.py +0 -18
  853. synth_ai/v0/tracing/log_client_base.py +0 -73
  854. synth_ai/v0/tracing/retry_queue.py +0 -186
  855. synth_ai/v0/tracing/trackers.py +0 -515
  856. synth_ai/v0/tracing/upload.py +0 -512
  857. synth_ai/v0/tracing/utils.py +0 -9
  858. synth_ai/v0/tracing_v1/__init__.py +0 -16
  859. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  860. synth_ai/v0/tracing_v1/base_client.py +0 -91
  861. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  862. synth_ai/v0/tracing_v1/config.py +0 -142
  863. synth_ai/v0/tracing_v1/context.py +0 -146
  864. synth_ai/v0/tracing_v1/decorators.py +0 -703
  865. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  866. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  867. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  868. synth_ai/v0/tracing_v1/events/store.py +0 -228
  869. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  870. synth_ai/v0/tracing_v1/local.py +0 -18
  871. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  872. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  873. synth_ai/v0/tracing_v1/trackers.py +0 -515
  874. synth_ai/v0/tracing_v1/upload.py +0 -527
  875. synth_ai/v0/tracing_v1/utils.py +0 -9
  876. synth_ai/zyk/__init__.py +0 -30
  877. synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
  878. synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
  879. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  880. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  881. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  882. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  885. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  886. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  887. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  888. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  889. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  890. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,729 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import contextlib
5
+ import logging
6
+ import os
7
+ from typing import Any
8
+
9
+ import httpx
10
+
11
+ logger = logging.getLogger(__name__)
12
+
13
+
14
+ class OpenAIClient:
15
+ """Async HTTP client for OpenAI-compatible inference servers (vLLM)."""
16
+
17
+ def __init__(
18
+ self,
19
+ base_url: str,
20
+ api_key: str | None = None,
21
+ timeout_s: float = 120.0,
22
+ ) -> None:
23
+ self.base_url = base_url.rstrip("/")
24
+ self.api_key = api_key
25
+ self.timeout_s = timeout_s
26
+ self.headers = {}
27
+ self._env_api_key: str | None = None
28
+
29
+ try:
30
+ env_key = os.getenv("ENVIRONMENT_API_KEY") or ""
31
+ env_key = env_key.strip()
32
+ if env_key:
33
+ self._env_api_key = env_key
34
+ except Exception:
35
+ self._env_api_key = None
36
+
37
+ if api_key:
38
+ self.headers["Authorization"] = f"Bearer {api_key}"
39
+
40
+ def _fix_model_parameters(
41
+ self, request: dict[str, Any], target_url: str | None = None
42
+ ) -> dict[str, Any]:
43
+ """
44
+ Fix parameter compatibility for newer OpenAI models.
45
+
46
+ Newer models like gpt-5-nano use 'max_completion_tokens' instead of 'max_tokens'.
47
+ """
48
+ if not request:
49
+ return request
50
+
51
+ # Make a copy to avoid modifying the original
52
+ fixed_request = request.copy()
53
+
54
+ # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI, Groq);
55
+ # strip fields those endpoints don't accept
56
+ is_openai = False
57
+ try:
58
+ if isinstance(target_url, str):
59
+ low = target_url.lower()
60
+ is_openai = (
61
+ ("openai.com" in low)
62
+ or ("azure" in low and ".openai." in low)
63
+ or ("groq.com" in low)
64
+ or ("/openai" in low)
65
+ )
66
+ except Exception:
67
+ is_openai = False
68
+
69
+ model = fixed_request.get("model", "")
70
+
71
+ if is_openai:
72
+ # Remove fields OpenAI/Groq don't accept
73
+ for k in (
74
+ "stop_after_tool_calls",
75
+ "thinking_mode",
76
+ "thinking_budget",
77
+ "reasoning",
78
+ "extra_body",
79
+ "parallel_tool_calls",
80
+ "function_call",
81
+ ):
82
+ if k in fixed_request:
83
+ fixed_request.pop(k, None)
84
+
85
+ # GPT-5 family specifics
86
+ if "gpt-5" in model or "gpt-4.1" in model:
87
+ # Convert max_tokens to max_completion_tokens for newer models
88
+ if "max_tokens" in fixed_request:
89
+ if "max_completion_tokens" not in fixed_request:
90
+ fixed_request["max_completion_tokens"] = fixed_request.pop("max_tokens")
91
+ logger.info(
92
+ f"Converted max_tokens to max_completion_tokens for model {model}"
93
+ )
94
+ else:
95
+ fixed_request.pop("max_tokens")
96
+ logger.info(f"Removed conflicting max_tokens parameter for model {model}")
97
+ # Some OpenAI endpoints ignore/deny sampling fields for reasoning models
98
+ for k in ("temperature", "top_p"):
99
+ if k in fixed_request:
100
+ fixed_request.pop(k, None)
101
+ # If tools are present, force single tool choice to our function
102
+ try:
103
+ tools = fixed_request.get("tools")
104
+ if isinstance(tools, list) and tools:
105
+ # Choose the first provided function name from tools schema (e.g., run_command)
106
+ func_name = None
107
+ for t in tools:
108
+ try:
109
+ cand = None
110
+ if isinstance(t, dict):
111
+ f = t.get("function")
112
+ if isinstance(f, dict):
113
+ cand = f.get("name")
114
+ if isinstance(cand, str) and cand:
115
+ func_name = cand
116
+ break
117
+ except Exception:
118
+ continue
119
+ if not func_name:
120
+ func_name = "run_command"
121
+ fixed_request["tool_choice"] = {
122
+ "type": "function",
123
+ "function": {"name": func_name},
124
+ }
125
+ fixed_request["parallel_tool_calls"] = False
126
+ except Exception:
127
+ pass
128
+
129
+ return fixed_request
130
+
131
+ async def generate(
132
+ self,
133
+ request: dict[str, Any],
134
+ base_url: str | None = None,
135
+ timeout_s: float | None = None,
136
+ extra_headers: dict[str, str] | None = None,
137
+ ) -> dict[str, Any]:
138
+ """
139
+ Send a chat completion request to the inference server.
140
+
141
+ Args:
142
+ request: OpenAI-compatible chat completion request
143
+ base_url: Override base URL for this request
144
+ timeout_s: Override timeout for this request
145
+ extra_headers: Additional headers to include (e.g., X-Policy-Name)
146
+
147
+ Returns:
148
+ OpenAI-compatible chat completion response
149
+ """
150
+ # Build target URL robustly: if a full endpoint is given (with query or already ending
151
+ # in /chat/completions), preserve it; otherwise, append the path BEFORE query params.
152
+ from urllib.parse import urlparse, urlunparse
153
+
154
+ candidate = (base_url or self.base_url).strip()
155
+ try:
156
+ parsed = urlparse(candidate)
157
+ # If no scheme, treat as relative base (pass-through)
158
+ if not parsed.scheme or not parsed.netloc:
159
+ base_no_slash = candidate.rstrip("/")
160
+ url = f"{base_no_slash}/v1/chat/completions"
161
+ else:
162
+ path = (parsed.path or "").rstrip("/")
163
+ if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
164
+ new_path = path
165
+ elif path.endswith("/v1"):
166
+ new_path = f"{path}/chat/completions"
167
+ elif path.endswith("/chat"):
168
+ new_path = f"{path}/completions"
169
+ else:
170
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
171
+ url = urlunparse(parsed._replace(path=new_path))
172
+ except Exception:
173
+ # Fallback to legacy behavior
174
+ url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
175
+ timeout = timeout_s or self.timeout_s
176
+
177
+ # Merge headers
178
+ headers = self.headers.copy()
179
+ try:
180
+ parsed_target = urlparse(url)
181
+ path_for_auth = (parsed_target.path or "") if parsed_target else ""
182
+ if self._env_api_key and "/proxy/" in path_for_auth:
183
+ headers.setdefault("X-API-Key", self._env_api_key)
184
+ except Exception:
185
+ pass
186
+ if extra_headers:
187
+ headers.update(extra_headers)
188
+
189
+ # Fix parameter compatibility for newer models
190
+ processed_request = self._fix_model_parameters(request, target_url=url)
191
+
192
+ # Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
193
+ logger.info(f"Inference POST target: {url}")
194
+ if extra_headers:
195
+ logger.info(f"Extra headers: {extra_headers}")
196
+ with contextlib.suppress(Exception):
197
+ keys_preview = sorted(processed_request.keys())
198
+ logger.info(f"Request keys: {keys_preview}")
199
+
200
+ # Detailed IO log: messages/tools/sampling and final payload fields
201
+ try:
202
+ import json as _json
203
+
204
+ def _truncate(text: str, limit: int = 2000) -> str:
205
+ return text if len(text) <= limit else text[:limit] + "…"
206
+
207
+ def _messages_preview(msgs: Any) -> str:
208
+ try:
209
+ out: list[dict[str, Any]] = []
210
+ if isinstance(msgs, list):
211
+ for m in msgs:
212
+ if not isinstance(m, dict):
213
+ continue
214
+ role = m.get("role")
215
+ content = m.get("content")
216
+ if isinstance(content, str):
217
+ text = content
218
+ elif isinstance(content, list):
219
+ parts: list[str] = []
220
+ for seg in content:
221
+ if isinstance(seg, dict) and isinstance(seg.get("text"), str):
222
+ parts.append(seg["text"])
223
+ text = "\n".join(parts)
224
+ else:
225
+ text = ""
226
+ out.append({"role": role, "content": _truncate(str(text), 4000)})
227
+ return _json.dumps(out)
228
+ except Exception:
229
+ return "[]"
230
+
231
+ def _tools_preview(tools: Any) -> str:
232
+ try:
233
+ return _truncate(_json.dumps(tools), 4000)
234
+ except Exception:
235
+ return "[]"
236
+
237
+ msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
238
+ tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
239
+ io_log: dict[str, Any] = {
240
+ "llm.call": True,
241
+ "model": processed_request.get("model") if isinstance(processed_request, dict) else None,
242
+ "tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
243
+ "parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
244
+ "stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
245
+ "temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
246
+ "top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
247
+ "max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
248
+ "max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
249
+ "messages_preview": _messages_preview(msgs),
250
+ "tools_preview": _tools_preview(tools),
251
+ }
252
+ logger.info(io_log)
253
+ except Exception:
254
+ pass
255
+
256
+ # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
257
+ try:
258
+ low_url = url.lower()
259
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
260
+ processed_request.pop("stop_after_tool_calls", None)
261
+ logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
262
+ # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
263
+ if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
264
+ processed_request, dict
265
+ ):
266
+ rf = processed_request.get("response_format")
267
+ rf_type = None
268
+ if isinstance(rf, dict):
269
+ rf_type = str(rf.get("type") or "").lower()
270
+ if rf_type in {"json_object", "json_schema"}:
271
+ msgs = processed_request.get("messages")
272
+ has_json_word = False
273
+ if isinstance(msgs, list):
274
+ for m in msgs:
275
+ try:
276
+ content = m.get("content") if isinstance(m, dict) else None
277
+ text = None
278
+ if isinstance(content, str):
279
+ text = content
280
+ elif isinstance(content, list):
281
+ # Join any text segments
282
+ parts = []
283
+ for seg in content:
284
+ if isinstance(seg, dict) and isinstance(
285
+ seg.get("text"), str
286
+ ):
287
+ parts.append(seg["text"])
288
+ text = "\n".join(parts)
289
+ if isinstance(text, str) and ("json" in text.lower()):
290
+ has_json_word = True
291
+ break
292
+ except Exception:
293
+ continue
294
+ if not has_json_word:
295
+ try:
296
+ instruction = (
297
+ "Respond in strict JSON only. Output a single valid JSON object."
298
+ )
299
+ if not isinstance(msgs, list):
300
+ msgs = []
301
+ # Prepend a system message to satisfy Groq requirement without changing user intent
302
+ prepend = {"role": "system", "content": instruction}
303
+ processed_request["messages"] = [prepend] + list(msgs)
304
+ logger.info(
305
+ "Injected JSON-mode system instruction for Groq response_format compliance"
306
+ )
307
+ except Exception:
308
+ pass
309
+ except Exception:
310
+ pass
311
+
312
+ async with httpx.AsyncClient(timeout=timeout) as client:
313
+ try:
314
+ response = await client.post(
315
+ url,
316
+ json=processed_request,
317
+ headers=headers,
318
+ )
319
+ response.raise_for_status()
320
+
321
+ # Rich response diagnostics
322
+ content_type = response.headers.get("content-type")
323
+ body_text = response.text
324
+ logger.info(
325
+ f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
326
+ )
327
+ if body_text:
328
+ # Log raw output with generous preview to debug no-tool-call issues
329
+ preview_len = min(4000, len(body_text))
330
+ logger.info({
331
+ "llm.raw_response": True,
332
+ "bytes": len(body_text),
333
+ "preview": body_text[:preview_len],
334
+ })
335
+
336
+ result = response.json()
337
+ logger.info(f"Inference response parsed_type={type(result).__name__}")
338
+
339
+ # Normalize tool calls so downstream always sees a function tool call
340
+ try:
341
+ if isinstance(result, dict):
342
+ choices = result.get("choices")
343
+ if isinstance(choices, list) and choices:
344
+ msg = choices[0].get("message")
345
+ if isinstance(msg, dict):
346
+ # Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
347
+ tc = msg.get("tool_calls")
348
+ fc = msg.get("function_call")
349
+ if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
350
+ name = fc.get("name") or "interact_many"
351
+ args = fc.get("arguments") or "{}"
352
+ msg["tool_calls"] = [
353
+ {
354
+ "id": "call_norm",
355
+ "type": "function",
356
+ "function": {"name": name, "arguments": args},
357
+ }
358
+ ]
359
+ # Encourage downstream to treat this as a tool call
360
+ if isinstance(choices[0], dict):
361
+ choices[0]["finish_reason"] = "tool_calls"
362
+ # Log tool call count for debugging
363
+ try:
364
+ tc2 = msg.get("tool_calls")
365
+ count = len(tc2) if isinstance(tc2, list) else 0
366
+ logger.info({
367
+ "llm.tool_calls": True,
368
+ "count": count,
369
+ "finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
370
+ })
371
+ except Exception:
372
+ pass
373
+ except Exception:
374
+ pass
375
+
376
+ return result
377
+
378
+ except httpx.TimeoutException:
379
+ logger.error(f"Request to {url} timed out after {timeout}s")
380
+ raise
381
+ except httpx.HTTPStatusError as e:
382
+ status = e.response.status_code if e.response is not None else None
383
+ text = e.response.text if e.response is not None else str(e)
384
+ # Log full body for debugging remote failures
385
+ try:
386
+ logger.error(
387
+ {
388
+ "openai_http_error": True,
389
+ "status": status,
390
+ "url": url,
391
+ "body": text,
392
+ }
393
+ )
394
+ except Exception:
395
+ logger.error(f"HTTP error from {url}: {status} - {text}")
396
+ # For 4xx/5xx, print full sanitized request to aid debugging (especially Groq 400s)
397
+ try:
398
+ redacted_headers = dict(headers)
399
+ if "Authorization" in redacted_headers:
400
+ redacted_headers["Authorization"] = "***REDACTED***"
401
+ logger.error(
402
+ {
403
+ "request_debug": True,
404
+ "status": status,
405
+ "target": url,
406
+ "headers": redacted_headers,
407
+ "payload": processed_request,
408
+ }
409
+ )
410
+ except Exception:
411
+ pass
412
+ # Special case: token budget exceeded (OpenAI-compatible error schema)
413
+ try:
414
+ if status == 400 and e.response is not None:
415
+ data = e.response.json()
416
+ detail = data.get("detail") if isinstance(data, dict) else None
417
+ err_code = (detail or {}).get("error") if isinstance(detail, dict) else None
418
+ if err_code == "token_budget_exceeded":
419
+ info = (detail or {}).get("details") or {}
420
+ messages_tokens = int(info.get("messages_tokens") or 0)
421
+ model_limit = int(info.get("model_limit") or 0)
422
+ safety = 64
423
+ # Compute a conservative new max_tokens
424
+ new_max = max(16, model_limit - messages_tokens - safety)
425
+ try:
426
+ # Update request and retry once immediately with smaller budget
427
+ if isinstance(processed_request, dict):
428
+ processed_request = dict(processed_request)
429
+ if "max_completion_tokens" in processed_request:
430
+ processed_request["max_completion_tokens"] = new_max
431
+ processed_request.pop("max_tokens", None)
432
+ else:
433
+ processed_request["max_tokens"] = new_max
434
+ # Remove optional fields that some servers reject
435
+ for k in ("thinking_mode", "thinking_budget", "reasoning"):
436
+ processed_request.pop(k, None)
437
+ # Force structured tool choice
438
+ if processed_request.get("tool_choice") == "required":
439
+ func_name = "run_command"
440
+ try:
441
+ tools_arr = processed_request.get("tools") or []
442
+ if isinstance(tools_arr, list) and tools_arr:
443
+ f = (
444
+ tools_arr[0].get("function")
445
+ if isinstance(tools_arr[0], dict)
446
+ else None
447
+ )
448
+ cand = (
449
+ (f or {}).get("name")
450
+ if isinstance(f, dict)
451
+ else None
452
+ )
453
+ if isinstance(cand, str) and cand:
454
+ func_name = cand
455
+ except Exception:
456
+ pass
457
+ processed_request["tool_choice"] = {
458
+ "type": "function",
459
+ "function": {"name": func_name},
460
+ }
461
+ processed_request["parallel_tool_calls"] = False
462
+ logger.warning(
463
+ {
464
+ "token_budget_recovery": True,
465
+ "messages_tokens": messages_tokens,
466
+ "model_limit": model_limit,
467
+ "retry_max_tokens": new_max,
468
+ }
469
+ )
470
+ # Retry once with reduced budget
471
+ async with httpx.AsyncClient(timeout=timeout) as client2:
472
+ r2 = await client2.post(
473
+ url, json=processed_request, headers=headers
474
+ )
475
+ r2.raise_for_status()
476
+ return r2.json()
477
+ except Exception:
478
+ pass
479
+ except Exception:
480
+ pass
481
+ raise
482
+ except Exception as e:
483
+ logger.error(f"Unexpected error calling {url}: {e}")
484
+ raise
485
+
486
+ async def check_health(
487
+ self,
488
+ base_url: str | None = None,
489
+ timeout_s: float | None = None,
490
+ ) -> dict[str, Any]:
491
+ """
492
+ Check if the inference service is healthy.
493
+
494
+ Args:
495
+ base_url: Override base URL for this request
496
+ timeout_s: Override timeout for this request
497
+
498
+ Returns:
499
+ Health status dict with 'status' field
500
+ """
501
+ url = (base_url or self.base_url).rstrip("/") + "/health"
502
+ timeout = timeout_s or 10.0
503
+
504
+ try:
505
+ async with httpx.AsyncClient(timeout=timeout) as client:
506
+ headers = self.headers.copy()
507
+ try:
508
+ parsed = httpx.URL(url)
509
+ if self._env_api_key and "/proxy/" in (parsed.path or ""):
510
+ headers.setdefault("X-API-Key", self._env_api_key)
511
+ except Exception:
512
+ pass
513
+ response = await client.get(url, headers=headers)
514
+ response.raise_for_status()
515
+ return response.json()
516
+ except httpx.HTTPStatusError as e:
517
+ if e.response.status_code == 400:
518
+ # Service is overloaded but still responding
519
+ try:
520
+ data = e.response.json()
521
+ if data.get("status") == "overloaded":
522
+ return {"status": "overloaded", "retry_after": data.get("retry_after", 1)}
523
+ except Exception:
524
+ pass
525
+ return {"status": "unhealthy", "error": str(e)}
526
+ except Exception as e:
527
+ return {"status": "unhealthy", "error": str(e)}
528
+
529
+ async def generate_with_retries(
530
+ self,
531
+ request: dict[str, Any],
532
+ base_url: str | None = None,
533
+ timeout_s: float | None = None,
534
+ max_retries: int = 4,
535
+ backoff_factor: float = 2.0,
536
+ extra_headers: dict[str, str] | None = None,
537
+ ) -> dict[str, Any]:
538
+ """
539
+ Generate with exponential backoff retries for transient errors.
540
+
541
+ Args:
542
+ request: OpenAI-compatible chat completion request
543
+ base_url: Override base URL
544
+ timeout_s: Override timeout
545
+ max_retries: Maximum number of retry attempts
546
+ backoff_factor: Exponential backoff multiplier
547
+ extra_headers: Additional headers to include (e.g., X-Policy-Name)
548
+
549
+ Returns:
550
+ OpenAI-compatible chat completion response
551
+ """
552
+ last_error = None
553
+ wait_time = 1.0
554
+
555
+ for attempt in range(max_retries + 1):
556
+ try:
557
+ # Apply parameter fixes to the request
558
+ processed_request = self._fix_model_parameters(
559
+ request,
560
+ target_url=(base_url or self.base_url).rstrip("/") + "/v1/chat/completions",
561
+ )
562
+ return await self.generate(
563
+ request=processed_request,
564
+ base_url=base_url,
565
+ timeout_s=timeout_s,
566
+ extra_headers=extra_headers,
567
+ )
568
+ except httpx.HTTPStatusError as e:
569
+ # Retry on 400 (overloaded), 429 (rate limit), 500 (internal error), 503 (service unavailable)
570
+ if e.response.status_code not in [400, 429, 500, 503]:
571
+ raise
572
+ last_error = e
573
+ if e.response.status_code == 400:
574
+ # Check if this is an overload error by looking at response content
575
+ try:
576
+ response_data = e.response.json()
577
+ if response_data.get("status") == "overloaded":
578
+ retry_after = response_data.get("retry_after", 1)
579
+ # Use the suggested retry_after time instead of exponential backoff for overload
580
+ wait_time = max(wait_time, float(retry_after))
581
+ logger.warning(
582
+ f"Inference service overloaded (400). {response_data} Retrying after {wait_time}s..."
583
+ )
584
+ else:
585
+ # This is a different type of 400 error, don't retry
586
+ try:
587
+ redacted_headers = {}
588
+ try:
589
+ redacted_headers = dict(self.headers)
590
+ if "Authorization" in redacted_headers:
591
+ redacted_headers["Authorization"] = "***REDACTED***"
592
+ except Exception:
593
+ redacted_headers = {}
594
+ logger.error(
595
+ {
596
+ "non_overload_400": True,
597
+ "target": (base_url or self.base_url),
598
+ "payload": processed_request,
599
+ "headers": redacted_headers,
600
+ "body": e.response.text if e.response is not None else None,
601
+ }
602
+ )
603
+ except Exception:
604
+ pass
605
+ raise RuntimeError(
606
+ f"Inference 400 response: {e.response.text if e.response is not None else 'Bad Request'}"
607
+ ) from e
608
+ except Exception:
609
+ # If we can't parse the response, don't retry 400 errors
610
+ with contextlib.suppress(Exception):
611
+ logger.error(
612
+ {
613
+ "non_overload_400_unparsed": True,
614
+ "target": (base_url or self.base_url),
615
+ "payload": processed_request,
616
+ }
617
+ )
618
+ raise RuntimeError(
619
+ f"Inference 400 response (unparsed): {e.response.text if e.response is not None else 'Bad Request'}"
620
+ ) from e
621
+ elif e.response.status_code == 503:
622
+ # Avoid referencing undefined response_data
623
+ try:
624
+ preview = (e.response.text or "")[:200]
625
+ except Exception:
626
+ preview = ""
627
+ logger.warning(
628
+ f"Flash returned 503; container may be cold starting. Retrying... body={preview}"
629
+ )
630
+ elif e.response.status_code == 500:
631
+ try:
632
+ preview = (e.response.text or "")[:200]
633
+ except Exception:
634
+ preview = ""
635
+ logger.warning(
636
+ f"Flash returned 500; inference service error. Retrying... body={preview}"
637
+ )
638
+ except httpx.TimeoutException as e:
639
+ last_error = e
640
+
641
+ if attempt < max_retries:
642
+ logger.warning(
643
+ f"Inference request failed (attempt {attempt + 1}/{max_retries + 1}), "
644
+ f"retrying in {wait_time}s..."
645
+ )
646
+ await asyncio.sleep(wait_time)
647
+ wait_time *= backoff_factor
648
+
649
+ raise last_error
650
+
651
+
652
+ def create_inference_client(
653
+ task_app: Any,
654
+ api_key: str | None = None,
655
+ ) -> OpenAIClient:
656
+ """
657
+ Create an inference client using TaskApp configuration.
658
+
659
+ Args:
660
+ task_app: TaskApp instance with vllm_base_url
661
+ api_key: Optional API key for authentication
662
+
663
+ Returns:
664
+ Configured OpenAIClient instance
665
+ """
666
+ # Fallback to environment if caller didn't provide an API key
667
+ if api_key is None:
668
+ try:
669
+ import os as _os # local import to avoid module-level side effects
670
+
671
+ api_key = _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
672
+ except Exception:
673
+ api_key = None
674
+
675
+ import json as _json
676
+ import os as _os
677
+ import time as _time
678
+
679
+ if _os.getenv("SYNTH_FAKE_INFERENCE", "").strip():
680
+
681
+ class _DummyClient:
682
+ async def generate_with_retries(
683
+ self,
684
+ request: dict[str, Any],
685
+ base_url: str | None = None,
686
+ max_retries: int = 0,
687
+ backoff_factor: float = 1.0,
688
+ extra_headers: dict[str, str] | None = None,
689
+ ) -> dict[str, Any]:
690
+ tool_call = {
691
+ "id": "call_dummy",
692
+ "type": "function",
693
+ "function": {
694
+ "name": "interact_many",
695
+ "arguments": _json.dumps({"actions": ["move_right"]}),
696
+ },
697
+ }
698
+ return {
699
+ "id": f"cmpl-{int(_time.time())}",
700
+ "object": "chat.completion",
701
+ "created": int(_time.time()),
702
+ "model": request.get("model") or "dummy-model",
703
+ "choices": [
704
+ {
705
+ "index": 0,
706
+ "message": {
707
+ "role": "assistant",
708
+ "content": "",
709
+ "tool_calls": [tool_call],
710
+ },
711
+ "finish_reason": "tool_calls",
712
+ }
713
+ ],
714
+ "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
715
+ }
716
+
717
+ async def check_health(
718
+ self,
719
+ base_url: str | None = None,
720
+ timeout_s: float | None = None,
721
+ ) -> dict[str, Any]:
722
+ return {"status": "ok", "dummy": True}
723
+
724
+ return _DummyClient()
725
+
726
+ return OpenAIClient(
727
+ base_url=task_app.vllm_base_url,
728
+ api_key=api_key,
729
+ )