synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,618 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import contextlib
5
+ import logging
6
+ from typing import Any
7
+
8
+ import httpx
9
+
10
+ logger = logging.getLogger(__name__)
11
+
12
+
13
+ class OpenAIClient:
14
+ """Async HTTP client for OpenAI-compatible inference servers (vLLM)."""
15
+
16
+ def __init__(
17
+ self,
18
+ base_url: str,
19
+ api_key: str | None = None,
20
+ timeout_s: float = 120.0,
21
+ ) -> None:
22
+ self.base_url = base_url.rstrip("/")
23
+ self.api_key = api_key
24
+ self.timeout_s = timeout_s
25
+ self.headers = {}
26
+
27
+ if api_key:
28
+ self.headers["Authorization"] = f"Bearer {api_key}"
29
+
30
+ def _fix_model_parameters(
31
+ self, request: dict[str, Any], target_url: str | None = None
32
+ ) -> dict[str, Any]:
33
+ """
34
+ Fix parameter compatibility for newer OpenAI models.
35
+
36
+ Newer models like gpt-5-nano use 'max_completion_tokens' instead of 'max_tokens'.
37
+ """
38
+ if not request:
39
+ return request
40
+
41
+ # Make a copy to avoid modifying the original
42
+ fixed_request = request.copy()
43
+
44
+ # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI, Groq);
45
+ # strip fields those endpoints don't accept
46
+ is_openai = False
47
+ try:
48
+ if isinstance(target_url, str):
49
+ low = target_url.lower()
50
+ is_openai = (
51
+ ("openai.com" in low)
52
+ or ("azure" in low and ".openai." in low)
53
+ or ("groq.com" in low)
54
+ or ("/openai" in low)
55
+ )
56
+ except Exception:
57
+ is_openai = False
58
+
59
+ model = fixed_request.get("model", "")
60
+
61
+ if is_openai:
62
+ # Remove fields OpenAI/Groq don't accept
63
+ for k in (
64
+ "stop_after_tool_calls",
65
+ "thinking_mode",
66
+ "thinking_budget",
67
+ "reasoning",
68
+ "extra_body",
69
+ "parallel_tool_calls",
70
+ "function_call",
71
+ ):
72
+ if k in fixed_request:
73
+ fixed_request.pop(k, None)
74
+
75
+ # GPT-5 family specifics
76
+ if "gpt-5" in model or "gpt-4.1" in model:
77
+ # Convert max_tokens to max_completion_tokens for newer models
78
+ if "max_tokens" in fixed_request:
79
+ if "max_completion_tokens" not in fixed_request:
80
+ fixed_request["max_completion_tokens"] = fixed_request.pop("max_tokens")
81
+ logger.info(
82
+ f"Converted max_tokens to max_completion_tokens for model {model}"
83
+ )
84
+ else:
85
+ fixed_request.pop("max_tokens")
86
+ logger.info(f"Removed conflicting max_tokens parameter for model {model}")
87
+ # Some OpenAI endpoints ignore/deny sampling fields for reasoning models
88
+ for k in ("temperature", "top_p"):
89
+ if k in fixed_request:
90
+ fixed_request.pop(k, None)
91
+ # If tools are present, force single tool choice to our function
92
+ try:
93
+ tools = fixed_request.get("tools")
94
+ if isinstance(tools, list) and tools:
95
+ # Choose the first provided function name from tools schema (e.g., run_command)
96
+ func_name = None
97
+ for t in tools:
98
+ try:
99
+ cand = None
100
+ if isinstance(t, dict):
101
+ f = t.get("function")
102
+ if isinstance(f, dict):
103
+ cand = f.get("name")
104
+ if isinstance(cand, str) and cand:
105
+ func_name = cand
106
+ break
107
+ except Exception:
108
+ continue
109
+ if not func_name:
110
+ func_name = "run_command"
111
+ fixed_request["tool_choice"] = {
112
+ "type": "function",
113
+ "function": {"name": func_name},
114
+ }
115
+ fixed_request["parallel_tool_calls"] = False
116
+ except Exception:
117
+ pass
118
+
119
+ return fixed_request
120
+
121
+ async def generate(
122
+ self,
123
+ request: dict[str, Any],
124
+ base_url: str | None = None,
125
+ timeout_s: float | None = None,
126
+ extra_headers: dict[str, str] | None = None,
127
+ ) -> dict[str, Any]:
128
+ """
129
+ Send a chat completion request to the inference server.
130
+
131
+ Args:
132
+ request: OpenAI-compatible chat completion request
133
+ base_url: Override base URL for this request
134
+ timeout_s: Override timeout for this request
135
+ extra_headers: Additional headers to include (e.g., X-Policy-Name)
136
+
137
+ Returns:
138
+ OpenAI-compatible chat completion response
139
+ """
140
+ url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
141
+ timeout = timeout_s or self.timeout_s
142
+
143
+ # Merge headers
144
+ headers = self.headers.copy()
145
+ if extra_headers:
146
+ headers.update(extra_headers)
147
+
148
+ # Fix parameter compatibility for newer models
149
+ processed_request = self._fix_model_parameters(request, target_url=url)
150
+
151
+ # Log request (redact messages in production)
152
+ logger.info(f"Inference POST target: {url}")
153
+ if extra_headers:
154
+ logger.info(f"Extra headers: {extra_headers}")
155
+ with contextlib.suppress(Exception):
156
+ keys_preview = sorted(processed_request.keys())
157
+ logger.info(f"Request keys: {keys_preview}")
158
+
159
+ # Final hard-guard for OpenAI: ensure unsupported field is not present
160
+ try:
161
+ if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
162
+ processed_request.pop("stop_after_tool_calls", None)
163
+ logger.info("Removed stop_after_tool_calls for OpenAI request")
164
+ # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
165
+ low_url = url.lower()
166
+ if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
167
+ processed_request, dict
168
+ ):
169
+ rf = processed_request.get("response_format")
170
+ rf_type = None
171
+ if isinstance(rf, dict):
172
+ rf_type = str(rf.get("type") or "").lower()
173
+ if rf_type in {"json_object", "json_schema"}:
174
+ msgs = processed_request.get("messages")
175
+ has_json_word = False
176
+ if isinstance(msgs, list):
177
+ for m in msgs:
178
+ try:
179
+ content = m.get("content") if isinstance(m, dict) else None
180
+ text = None
181
+ if isinstance(content, str):
182
+ text = content
183
+ elif isinstance(content, list):
184
+ # Join any text segments
185
+ parts = []
186
+ for seg in content:
187
+ if isinstance(seg, dict) and isinstance(
188
+ seg.get("text"), str
189
+ ):
190
+ parts.append(seg["text"])
191
+ text = "\n".join(parts)
192
+ if isinstance(text, str) and ("json" in text.lower()):
193
+ has_json_word = True
194
+ break
195
+ except Exception:
196
+ continue
197
+ if not has_json_word:
198
+ try:
199
+ instruction = (
200
+ "Respond in strict JSON only. Output a single valid JSON object."
201
+ )
202
+ if not isinstance(msgs, list):
203
+ msgs = []
204
+ # Prepend a system message to satisfy Groq requirement without changing user intent
205
+ prepend = {"role": "system", "content": instruction}
206
+ processed_request["messages"] = [prepend] + list(msgs)
207
+ logger.info(
208
+ "Injected JSON-mode system instruction for Groq response_format compliance"
209
+ )
210
+ except Exception:
211
+ pass
212
+ except Exception:
213
+ pass
214
+
215
+ async with httpx.AsyncClient(timeout=timeout) as client:
216
+ try:
217
+ response = await client.post(
218
+ url,
219
+ json=processed_request,
220
+ headers=headers,
221
+ )
222
+ response.raise_for_status()
223
+
224
+ # Rich response diagnostics
225
+ content_type = response.headers.get("content-type")
226
+ body_text = response.text
227
+ logger.info(
228
+ f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
229
+ )
230
+ if body_text:
231
+ preview_len = min(800, len(body_text))
232
+ logger.info(
233
+ f"Inference response preview ({preview_len} bytes): {body_text[:preview_len]}"
234
+ )
235
+
236
+ result = response.json()
237
+ logger.info(f"Inference response parsed_type={type(result).__name__}")
238
+ return result
239
+
240
+ except httpx.TimeoutException:
241
+ logger.error(f"Request to {url} timed out after {timeout}s")
242
+ raise
243
+ except httpx.HTTPStatusError as e:
244
+ status = e.response.status_code if e.response is not None else None
245
+ text = e.response.text if e.response is not None else str(e)
246
+ # Log full body for debugging remote failures
247
+ try:
248
+ logger.error(
249
+ {
250
+ "openai_http_error": True,
251
+ "status": status,
252
+ "url": url,
253
+ "body": text,
254
+ }
255
+ )
256
+ except Exception:
257
+ logger.error(f"HTTP error from {url}: {status} - {text}")
258
+ # For 4xx/5xx, print full sanitized request to aid debugging (especially Groq 400s)
259
+ try:
260
+ redacted_headers = dict(headers)
261
+ if "Authorization" in redacted_headers:
262
+ redacted_headers["Authorization"] = "***REDACTED***"
263
+ logger.error(
264
+ {
265
+ "request_debug": True,
266
+ "status": status,
267
+ "target": url,
268
+ "headers": redacted_headers,
269
+ "payload": processed_request,
270
+ }
271
+ )
272
+ except Exception:
273
+ pass
274
+ # Special case: token budget exceeded (OpenAI-compatible error schema)
275
+ try:
276
+ if status == 400 and e.response is not None:
277
+ data = e.response.json()
278
+ detail = data.get("detail") if isinstance(data, dict) else None
279
+ err_code = (detail or {}).get("error") if isinstance(detail, dict) else None
280
+ if err_code == "token_budget_exceeded":
281
+ info = (detail or {}).get("details") or {}
282
+ messages_tokens = int(info.get("messages_tokens") or 0)
283
+ model_limit = int(info.get("model_limit") or 0)
284
+ safety = 64
285
+ # Compute a conservative new max_tokens
286
+ new_max = max(16, model_limit - messages_tokens - safety)
287
+ try:
288
+ # Update request and retry once immediately with smaller budget
289
+ if isinstance(processed_request, dict):
290
+ processed_request = dict(processed_request)
291
+ if "max_completion_tokens" in processed_request:
292
+ processed_request["max_completion_tokens"] = new_max
293
+ processed_request.pop("max_tokens", None)
294
+ else:
295
+ processed_request["max_tokens"] = new_max
296
+ # Remove optional fields that some servers reject
297
+ for k in ("thinking_mode", "thinking_budget", "reasoning"):
298
+ processed_request.pop(k, None)
299
+ # Force structured tool choice
300
+ if processed_request.get("tool_choice") == "required":
301
+ func_name = "run_command"
302
+ try:
303
+ tools_arr = processed_request.get("tools") or []
304
+ if isinstance(tools_arr, list) and tools_arr:
305
+ f = (
306
+ tools_arr[0].get("function")
307
+ if isinstance(tools_arr[0], dict)
308
+ else None
309
+ )
310
+ cand = (
311
+ (f or {}).get("name")
312
+ if isinstance(f, dict)
313
+ else None
314
+ )
315
+ if isinstance(cand, str) and cand:
316
+ func_name = cand
317
+ except Exception:
318
+ pass
319
+ processed_request["tool_choice"] = {
320
+ "type": "function",
321
+ "function": {"name": func_name},
322
+ }
323
+ processed_request["parallel_tool_calls"] = False
324
+ logger.warning(
325
+ {
326
+ "token_budget_recovery": True,
327
+ "messages_tokens": messages_tokens,
328
+ "model_limit": model_limit,
329
+ "retry_max_tokens": new_max,
330
+ }
331
+ )
332
+ # Retry once with reduced budget
333
+ async with httpx.AsyncClient(timeout=timeout) as client2:
334
+ r2 = await client2.post(
335
+ url, json=processed_request, headers=headers
336
+ )
337
+ r2.raise_for_status()
338
+ return r2.json()
339
+ except Exception:
340
+ pass
341
+ except Exception:
342
+ pass
343
+ # Gracefully degrade on 422 so rollouts can still produce a trajectory
344
+ if status == 422:
345
+ try:
346
+ # Best-effort parse of error for diagnostics
347
+ err = None
348
+ try:
349
+ err = e.response.json()
350
+ except Exception:
351
+ err = {"error": "unprocessable", "detail": (text or "")[:200]}
352
+ logger.warning(
353
+ {
354
+ "inference_422_recovered": True,
355
+ "detail": err,
356
+ }
357
+ )
358
+ except Exception:
359
+ pass
360
+ # Return a minimal OpenAI-compatible response with no tool_calls/content
361
+ import time as _t
362
+
363
+ return {
364
+ "id": f"cmpl-{int(_t.time())}",
365
+ "object": "chat.completion",
366
+ "created": int(_t.time()),
367
+ "model": processed_request.get("model") or "unknown",
368
+ "choices": [
369
+ {
370
+ "index": 0,
371
+ "message": {"role": "assistant", "content": "", "tool_calls": []},
372
+ "finish_reason": "stop",
373
+ }
374
+ ],
375
+ "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
376
+ }
377
+ raise
378
+ except Exception as e:
379
+ logger.error(f"Unexpected error calling {url}: {e}")
380
+ raise
381
+
382
+ async def check_health(
383
+ self,
384
+ base_url: str | None = None,
385
+ timeout_s: float | None = None,
386
+ ) -> dict[str, Any]:
387
+ """
388
+ Check if the inference service is healthy.
389
+
390
+ Args:
391
+ base_url: Override base URL for this request
392
+ timeout_s: Override timeout for this request
393
+
394
+ Returns:
395
+ Health status dict with 'status' field
396
+ """
397
+ url = (base_url or self.base_url).rstrip("/") + "/health"
398
+ timeout = timeout_s or 10.0
399
+
400
+ try:
401
+ async with httpx.AsyncClient(timeout=timeout) as client:
402
+ response = await client.get(url, headers=self.headers)
403
+ response.raise_for_status()
404
+ return response.json()
405
+ except httpx.HTTPStatusError as e:
406
+ if e.response.status_code == 400:
407
+ # Service is overloaded but still responding
408
+ try:
409
+ data = e.response.json()
410
+ if data.get("status") == "overloaded":
411
+ return {"status": "overloaded", "retry_after": data.get("retry_after", 1)}
412
+ except Exception:
413
+ pass
414
+ return {"status": "unhealthy", "error": str(e)}
415
+ except Exception as e:
416
+ return {"status": "unhealthy", "error": str(e)}
417
+
418
+ async def generate_with_retries(
419
+ self,
420
+ request: dict[str, Any],
421
+ base_url: str | None = None,
422
+ timeout_s: float | None = None,
423
+ max_retries: int = 4,
424
+ backoff_factor: float = 2.0,
425
+ extra_headers: dict[str, str] | None = None,
426
+ ) -> dict[str, Any]:
427
+ """
428
+ Generate with exponential backoff retries for transient errors.
429
+
430
+ Args:
431
+ request: OpenAI-compatible chat completion request
432
+ base_url: Override base URL
433
+ timeout_s: Override timeout
434
+ max_retries: Maximum number of retry attempts
435
+ backoff_factor: Exponential backoff multiplier
436
+ extra_headers: Additional headers to include (e.g., X-Policy-Name)
437
+
438
+ Returns:
439
+ OpenAI-compatible chat completion response
440
+ """
441
+ last_error = None
442
+ wait_time = 1.0
443
+
444
+ for attempt in range(max_retries + 1):
445
+ try:
446
+ # Apply parameter fixes to the request
447
+ processed_request = self._fix_model_parameters(
448
+ request,
449
+ target_url=(base_url or self.base_url).rstrip("/") + "/v1/chat/completions",
450
+ )
451
+ return await self.generate(
452
+ request=processed_request,
453
+ base_url=base_url,
454
+ timeout_s=timeout_s,
455
+ extra_headers=extra_headers,
456
+ )
457
+ except httpx.HTTPStatusError as e:
458
+ # Retry on 400 (overloaded), 429 (rate limit), 500 (internal error), 503 (service unavailable)
459
+ if e.response.status_code not in [400, 429, 500, 503]:
460
+ raise
461
+ last_error = e
462
+ if e.response.status_code == 400:
463
+ # Check if this is an overload error by looking at response content
464
+ try:
465
+ response_data = e.response.json()
466
+ if response_data.get("status") == "overloaded":
467
+ retry_after = response_data.get("retry_after", 1)
468
+ # Use the suggested retry_after time instead of exponential backoff for overload
469
+ wait_time = max(wait_time, float(retry_after))
470
+ logger.warning(
471
+ f"Inference service overloaded (400). {response_data} Retrying after {wait_time}s..."
472
+ )
473
+ else:
474
+ # This is a different type of 400 error, don't retry
475
+ try:
476
+ redacted_headers = {}
477
+ try:
478
+ redacted_headers = dict(self.headers)
479
+ if "Authorization" in redacted_headers:
480
+ redacted_headers["Authorization"] = "***REDACTED***"
481
+ except Exception:
482
+ redacted_headers = {}
483
+ logger.error(
484
+ {
485
+ "non_overload_400": True,
486
+ "target": (base_url or self.base_url),
487
+ "payload": processed_request,
488
+ "headers": redacted_headers,
489
+ "body": e.response.text if e.response is not None else None,
490
+ }
491
+ )
492
+ except Exception:
493
+ pass
494
+ raise RuntimeError(
495
+ f"Inference 400 response: {e.response.text if e.response is not None else 'Bad Request'}"
496
+ ) from e
497
+ except Exception:
498
+ # If we can't parse the response, don't retry 400 errors
499
+ with contextlib.suppress(Exception):
500
+ logger.error(
501
+ {
502
+ "non_overload_400_unparsed": True,
503
+ "target": (base_url or self.base_url),
504
+ "payload": processed_request,
505
+ }
506
+ )
507
+ raise RuntimeError(
508
+ f"Inference 400 response (unparsed): {e.response.text if e.response is not None else 'Bad Request'}"
509
+ ) from e
510
+ elif e.response.status_code == 503:
511
+ # Avoid referencing undefined response_data
512
+ try:
513
+ preview = (e.response.text or "")[:200]
514
+ except Exception:
515
+ preview = ""
516
+ logger.warning(
517
+ f"Flash returned 503; container may be cold starting. Retrying... body={preview}"
518
+ )
519
+ elif e.response.status_code == 500:
520
+ try:
521
+ preview = (e.response.text or "")[:200]
522
+ except Exception:
523
+ preview = ""
524
+ logger.warning(
525
+ f"Flash returned 500; inference service error. Retrying... body={preview}"
526
+ )
527
+ except httpx.TimeoutException as e:
528
+ last_error = e
529
+
530
+ if attempt < max_retries:
531
+ logger.warning(
532
+ f"Inference request failed (attempt {attempt + 1}/{max_retries + 1}), "
533
+ f"retrying in {wait_time}s..."
534
+ )
535
+ await asyncio.sleep(wait_time)
536
+ wait_time *= backoff_factor
537
+
538
+ raise last_error
539
+
540
+
541
+ def create_inference_client(
542
+ task_app: Any,
543
+ api_key: str | None = None,
544
+ ) -> OpenAIClient:
545
+ """
546
+ Create an inference client using TaskApp configuration.
547
+
548
+ Args:
549
+ task_app: TaskApp instance with vllm_base_url
550
+ api_key: Optional API key for authentication
551
+
552
+ Returns:
553
+ Configured OpenAIClient instance
554
+ """
555
+ # Fallback to environment if caller didn't provide an API key
556
+ if api_key is None:
557
+ try:
558
+ import os as _os # local import to avoid module-level side effects
559
+
560
+ api_key = _os.getenv("OPENAI_API_KEY") or getattr(task_app, "openai_api_key", None)
561
+ except Exception:
562
+ api_key = None
563
+
564
+ import json as _json
565
+ import os as _os
566
+ import time as _time
567
+
568
+ if _os.getenv("SYNTH_FAKE_INFERENCE", "").strip():
569
+
570
+ class _DummyClient:
571
+ async def generate_with_retries(
572
+ self,
573
+ request: dict[str, Any],
574
+ base_url: str | None = None,
575
+ max_retries: int = 0,
576
+ backoff_factor: float = 1.0,
577
+ extra_headers: dict[str, str] | None = None,
578
+ ) -> dict[str, Any]:
579
+ tool_call = {
580
+ "id": "call_dummy",
581
+ "type": "function",
582
+ "function": {
583
+ "name": "interact_many",
584
+ "arguments": _json.dumps({"actions": ["move_right"]}),
585
+ },
586
+ }
587
+ return {
588
+ "id": f"cmpl-{int(_time.time())}",
589
+ "object": "chat.completion",
590
+ "created": int(_time.time()),
591
+ "model": request.get("model") or "dummy-model",
592
+ "choices": [
593
+ {
594
+ "index": 0,
595
+ "message": {
596
+ "role": "assistant",
597
+ "content": "",
598
+ "tool_calls": [tool_call],
599
+ },
600
+ "finish_reason": "tool_calls",
601
+ }
602
+ ],
603
+ "usage": {"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15},
604
+ }
605
+
606
+ async def check_health(
607
+ self,
608
+ base_url: str | None = None,
609
+ timeout_s: float | None = None,
610
+ ) -> dict[str, Any]:
611
+ return {"status": "ok", "dummy": True}
612
+
613
+ return _DummyClient()
614
+
615
+ return OpenAIClient(
616
+ base_url=task_app.vllm_base_url,
617
+ api_key=api_key,
618
+ )