synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -7,7 +7,9 @@ import logging
7
7
  import os
8
8
  import time
9
9
  from typing import Any
10
+ from urllib.parse import urlparse, urlunparse
10
11
 
12
+ import click
11
13
  import httpx
12
14
 
13
15
  logger = logging.getLogger(__name__)
@@ -50,20 +52,19 @@ class OpenAIClient:
50
52
  # Make a copy to avoid modifying the original
51
53
  fixed_request = request.copy()
52
54
 
53
- # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI, Groq);
54
- # strip fields those endpoints don't accept
55
+ # Determine if target is OpenAI-compatible (OpenAI, Azure OpenAI).
56
+ # Groq shares the API surface but we keep tool enforcement fields intact.
55
57
  is_openai = False
58
+ is_groq = False
56
59
  try:
57
60
  if isinstance(target_url, str):
58
61
  low = target_url.lower()
59
- is_openai = (
60
- ("openai.com" in low)
61
- or ("azure" in low and ".openai." in low)
62
- or ("groq.com" in low)
63
- or ("/openai" in low)
64
- or ("/proxy/groq" in low)
65
- or ("/proxy/openai" in low)
66
- )
62
+ if "groq.com" in low or "/proxy/groq" in low:
63
+ is_groq = True
64
+ elif ("openai.com" in low) or ("azure" in low and ".openai." in low) or (
65
+ "/proxy/openai" in low
66
+ ):
67
+ is_openai = True
67
68
  except Exception:
68
69
  is_openai = False
69
70
 
@@ -149,11 +150,169 @@ class OpenAIClient:
149
150
  OpenAI-compatible chat completion response
150
151
  """
151
152
  base = (base_url or self.base_url).rstrip("/")
152
- # Don't append /v1/chat/completions if the URL already contains it
153
- if "/v1/chat/completions" in base:
153
+ # Ensure processed_request is defined for error logging paths
154
+ processed_request: dict[str, Any] = dict(request or {})
155
+
156
+ # Bulletproof normalization BEFORE any parsing
157
+ def _local_force_normalize(u: str) -> str:
158
+ if not isinstance(u, str) or not u:
159
+ return u
160
+ p = urlparse(u)
161
+ path = (p.path or "").rstrip("/")
162
+ q = p.query or ""
163
+ # If query contains a path segment, extract and repair
164
+ if q and "/" in q:
165
+ before, after = q.split("/", 1)
166
+ # Split off any extra query parameters that were appended after the path
167
+ cut_positions = [i for i in [after.find("&"), after.find("?")] if i >= 0]
168
+ cut = min(cut_positions) if cut_positions else len(after)
169
+ path_from_query = "/" + after[:cut]
170
+ extra_query = after[cut + 1 :] if cut < len(after) else ""
171
+ merged_query = before
172
+ if extra_query:
173
+ merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
174
+ # Ensure final path
175
+ final_path = path_from_query if path_from_query.startswith("/v1/chat/completions") else f"{path_from_query.rstrip('/')}/v1/chat/completions"
176
+ p = p._replace(path=final_path, query=merged_query)
177
+ u = urlunparse(p)
178
+ p = urlparse(u)
179
+ path = p.path or ""
180
+ q = p.query or ""
181
+ if not path.endswith("/v1/chat/completions"):
182
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
183
+ p = p._replace(path=new_path)
184
+ u = urlunparse(p)
185
+ p = urlparse(u)
186
+ q = p.query or ""
187
+ if q and "/" in q:
188
+ # Last-resort: drop anything after first '/'
189
+ safe_q = q.split("/")[0]
190
+ p = p._replace(query=safe_q)
191
+ u = urlunparse(p)
192
+ return u
193
+
194
+ norm_base = None
195
+ try:
196
+ # Try importing shared normalizer first
197
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.utils import (
198
+ force_normalize_chat_completions_url,
199
+ )
200
+ norm_base = force_normalize_chat_completions_url(base)
201
+ except Exception:
202
+ norm_base = _local_force_normalize(base)
203
+ base = norm_base or base
204
+ # Parse URL to handle query parameters correctly
205
+ parsed = urlparse(base)
206
+ path = parsed.path.rstrip("/")
207
+ query = parsed.query
208
+
209
+ # Debug: Log URL parsing
210
+ logger.error(f"[URL_PARSE] base={base} parsed.path={parsed.path} parsed.query={parsed.query}")
211
+
212
+ # CRITICAL FIX: Handle malformed URLs where path is incorrectly in the query string
213
+ # Example: https://host?cid=trace_123/v1/chat/completions
214
+ # Should be: https://host/v1/chat/completions?cid=trace_123
215
+
216
+ # ALWAYS check for malformed URLs - this is CRITICAL
217
+ # CRASH IMMEDIATELY if URL is malformed - don't let it through!
218
+ if query and "/" in query:
219
+ logger.error(f"[URL_FATAL] MALFORMED URL DETECTED AT START: base={base} query={query}")
220
+ # Try to fix it
221
+ logger.error(f"[URL_FIX_TRIGGERED] Query contains '/': query={query}")
222
+ # This is a malformed URL - extract path from query and fix it
223
+ logger.error(
224
+ f"[URL_FIX] Malformed URL detected: {base}\n"
225
+ f"Query contains path segments. Fixing..."
226
+ )
227
+
228
+ # Find where the path starts in the query string
229
+ # The query format is: "cid=value/path" or similar
230
+ # We need to find the first "/" that starts a path segment
231
+ query_parts = query.split("/", 1)
232
+ if len(query_parts) == 2:
233
+ # query_parts[0] is the actual query (e.g., "cid=trace_123")
234
+ # query_parts[1] is the path that was incorrectly put in query
235
+ actual_query = query_parts[0]
236
+ path_and_more = query_parts[1] # Could be "v1/chat/completions" or "v1/chat/completions&foo=bar"
237
+
238
+ # Extract the path part (everything before "&" or "?" if present)
239
+ # Handle both "&" (query param separator) and "?" (another malformed query separator)
240
+ if "&" in path_and_more:
241
+ # Path is followed by more query params (separated by &)
242
+ path_segment, extra_query = path_and_more.split("&", 1)
243
+ path_in_query = "/" + path_segment # Restore leading slash
244
+ # Merge extra query params with actual_query
245
+ actual_query = f"{actual_query}&{extra_query}"
246
+ elif "?" in path_and_more:
247
+ # Path is followed by more query params (separated by ?, which is malformed)
248
+ path_segment, extra_query = path_and_more.split("?", 1)
249
+ path_in_query = "/" + path_segment # Restore leading slash
250
+ # Merge extra query params with actual_query (use & as separator)
251
+ actual_query = f"{actual_query}&{extra_query}"
252
+ else:
253
+ # No extra query params, just the path
254
+ path_in_query = "/" + path_and_more # Restore leading slash
255
+
256
+ # If the path_in_query already contains /v1/chat/completions, use it
257
+ # Otherwise, append /v1/chat/completions
258
+ if path_in_query.startswith("/v1/chat/completions"):
259
+ final_path = path_in_query
260
+ else:
261
+ # Append /v1/chat/completions to whatever path we found
262
+ final_path = path_in_query.rstrip("/") + "/v1/chat/completions"
263
+
264
+ # Reconstruct URL correctly: path comes before query
265
+ parsed = parsed._replace(path=final_path, query=actual_query)
266
+ url = urlunparse(parsed)
267
+ logger.warning(f"[URL_FIX] Fixed malformed URL:\n FROM: {base}\n TO: {url}")
268
+ else:
269
+ # Can't parse, fall through to normal processing
270
+ logger.error(f"[URL_FIX] Could not parse malformed query: {query}")
271
+ path = parsed.path.rstrip("/")
272
+ if not path.endswith("/v1/chat/completions"):
273
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
274
+ parsed = parsed._replace(path=new_path)
275
+ url = urlunparse(parsed)
276
+ else:
277
+ url = base
278
+ # Normal case: query params are separate from path
279
+ elif path.endswith("/v1/chat/completions"):
154
280
  url = base
155
281
  else:
156
- url = base + "/v1/chat/completions"
282
+ # Append /v1/chat/completions to the path, preserving query params
283
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
284
+ parsed = parsed._replace(path=new_path)
285
+ url = urlunparse(parsed)
286
+ logger.debug(f"[URL_CONSTRUCT] Added path to URL: {base} -> {url}")
287
+
288
+ # FINAL VALIDATION: Ensure the constructed URL is correct
289
+ final_parsed = urlparse(url)
290
+ final_path = final_parsed.path or ""
291
+ final_query = final_parsed.query or ""
292
+
293
+ # Verify path is correct
294
+ if not final_path.endswith("/v1/chat/completions"):
295
+ error_msg = (
296
+ f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
297
+ f"Original: {base}\n"
298
+ f"Constructed: {url}\n"
299
+ f"Path: {final_path}\n"
300
+ )
301
+ logger.error(error_msg)
302
+ raise ValueError(error_msg)
303
+
304
+ # Verify query doesn't contain path segments
305
+ if final_query and "/" in final_query:
306
+ error_msg = (
307
+ f"FATAL [OpenAIClient]: Query still contains path segments after fix!\n"
308
+ f"Original: {base}\n"
309
+ f"Constructed: {url}\n"
310
+ f"Query: {final_query}\n"
311
+ f"This indicates a bug in URL construction logic."
312
+ )
313
+ logger.error(error_msg)
314
+ raise ValueError(error_msg)
315
+
157
316
  timeout = timeout_s or self.timeout_s
158
317
 
159
318
  # Merge headers
@@ -234,38 +393,104 @@ class OpenAIClient:
234
393
  logger.debug(f"🔊 [OPENAI_CLIENT_POST_FIX] Message[1] content value: {msg1_content_post if not isinstance(msg1_content_post, list) else f'list[{len(msg1_content_post)}]'}")
235
394
 
236
395
  # Log request (redact messages in production)
396
+ # CRITICAL: Verify URL is correct BEFORE making HTTP request
397
+ final_parsed_check = urlparse(url)
398
+ logger.error(f"[URL_FINAL_CHECK] Before HTTP request: url={url} path={final_parsed_check.path} query={final_parsed_check.query}")
399
+
400
+ # CRASH IF URL IS STILL MALFORMED - DO NOT PROCEED
401
+ if final_parsed_check.query and "/" in final_parsed_check.query:
402
+ error_msg = (
403
+ f"FATAL [OpenAIClient]: URL IS STILL MALFORMED AFTER FIX ATTEMPT!\n"
404
+ f"Original base_url: {base_url or self.base_url}\n"
405
+ f"Constructed URL: {url}\n"
406
+ f"Path: {final_parsed_check.path}\n"
407
+ f"Query (contains path): {final_parsed_check.query}\n"
408
+ f"This will cause a 404 error. CRASHING NOW to prevent bad request."
409
+ )
410
+ logger.error(error_msg)
411
+ raise ValueError(error_msg)
412
+
413
+ # Verify path is correct
414
+ if not final_parsed_check.path.endswith("/v1/chat/completions"):
415
+ error_msg = (
416
+ f"FATAL [OpenAIClient]: URL missing /v1/chat/completions path!\n"
417
+ f"URL: {url}\n"
418
+ f"Path: {final_parsed_check.path}\n"
419
+ )
420
+ logger.error(error_msg)
421
+ raise ValueError(error_msg)
422
+
423
+ # Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
237
424
  logger.info(f"Inference POST target: {url}")
238
425
  if extra_headers:
239
426
  logger.info(f"Extra headers: {extra_headers}")
240
427
  with contextlib.suppress(Exception):
241
428
  keys_preview = sorted(processed_request.keys())
242
429
  logger.info(f"Request keys: {keys_preview}")
243
- # DEBUG: Log message structure for vision debugging
244
- if "messages" in processed_request:
245
- msgs = processed_request["messages"]
246
- if isinstance(msgs, list):
247
- logger.debug(f"🔊 [OPENAI_CLIENT] Request has {len(msgs)} messages")
248
- for idx, msg in enumerate(msgs):
249
- if isinstance(msg, dict):
250
- role = msg.get("role")
251
- content = msg.get("content")
252
- if isinstance(content, list):
253
- logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content=list[{len(content)}]")
254
- for part_idx, part in enumerate(content):
255
- if isinstance(part, dict):
256
- part_type = part.get("type")
257
- logger.debug(f"🔊 [OPENAI_CLIENT] Part[{part_idx}]: type={part_type}")
430
+
431
+ # Detailed IO log: messages/tools/sampling and final payload fields
432
+ try:
433
+ import json as _json
434
+
435
+ def _truncate(text: str, limit: int = 2000) -> str:
436
+ return text if len(text) <= limit else text[:limit] + "…"
437
+
438
+ def _messages_preview(msgs: Any) -> str:
439
+ try:
440
+ out: list[dict[str, Any]] = []
441
+ if isinstance(msgs, list):
442
+ for m in msgs:
443
+ if not isinstance(m, dict):
444
+ continue
445
+ role = m.get("role")
446
+ content = m.get("content")
447
+ if isinstance(content, str):
448
+ text = content
449
+ elif isinstance(content, list):
450
+ parts: list[str] = []
451
+ for seg in content:
452
+ if isinstance(seg, dict) and isinstance(seg.get("text"), str):
453
+ parts.append(seg["text"])
454
+ text = "\n".join(parts)
258
455
  else:
259
- content_len = len(str(content)) if content else 0
260
- logger.debug(f"🔊 [OPENAI_CLIENT] Message[{idx}] role={role}, content_type={type(content).__name__}, len={content_len}")
456
+ text = ""
457
+ out.append({"role": role, "content": _truncate(str(text), 4000)})
458
+ return _json.dumps(out)
459
+ except Exception:
460
+ return "[]"
261
461
 
262
- # Final hard-guard for OpenAI: ensure unsupported field is not present
462
+ def _tools_preview(tools: Any) -> str:
463
+ try:
464
+ return _truncate(_json.dumps(tools), 4000)
465
+ except Exception:
466
+ return "[]"
467
+
468
+ msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
469
+ tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
470
+ io_log: dict[str, Any] = {
471
+ "llm.call": True,
472
+ "model": processed_request.get("model") if isinstance(processed_request, dict) else None,
473
+ "tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
474
+ "parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
475
+ "stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
476
+ "temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
477
+ "top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
478
+ "max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
479
+ "max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
480
+ "messages_preview": _messages_preview(msgs),
481
+ "tools_preview": _tools_preview(tools),
482
+ }
483
+ logger.info(io_log)
484
+ except Exception:
485
+ pass
486
+
487
+ # Final hard-guard for OpenAI/Groq: drop unsupported field
263
488
  try:
264
- if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
489
+ low_url = url.lower()
490
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
265
491
  processed_request.pop("stop_after_tool_calls", None)
266
- logger.info("Removed stop_after_tool_calls for OpenAI request")
492
+ logger.info("Removed stop_after_tool_calls for %s request", "Groq/OpenAI")
267
493
  # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
268
- low_url = url.lower()
269
494
  if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
270
495
  processed_request, dict
271
496
  ):
@@ -330,10 +555,70 @@ class OpenAIClient:
330
555
  logger.info(
331
556
  f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
332
557
  )
333
- # Do not log prompt or full response body
558
+ if body_text:
559
+ # Log raw output with generous preview to debug no-tool-call issues
560
+ preview_len = min(4000, len(body_text))
561
+ logger.info({
562
+ "llm.raw_response": True,
563
+ "bytes": len(body_text),
564
+ "preview": body_text[:preview_len],
565
+ })
334
566
 
335
567
  result = response.json()
336
568
  logger.info(f"Inference response parsed_type={type(result).__name__}")
569
+
570
+ tool_call_count = -1
571
+ # Normalize tool calls so downstream always sees a function tool call
572
+ try:
573
+ if isinstance(result, dict):
574
+ choices = result.get("choices")
575
+ if isinstance(choices, list) and choices:
576
+ msg = choices[0].get("message")
577
+ if isinstance(msg, dict):
578
+ # Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
579
+ tc = msg.get("tool_calls")
580
+ fc = msg.get("function_call")
581
+ if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
582
+ name = fc.get("name") or "interact_many"
583
+ args = fc.get("arguments") or "{}"
584
+ msg["tool_calls"] = [
585
+ {
586
+ "id": "call_norm",
587
+ "type": "function",
588
+ "function": {"name": name, "arguments": args},
589
+ }
590
+ ]
591
+ if isinstance(choices[0], dict):
592
+ choices[0]["finish_reason"] = "tool_calls"
593
+ # Log tool call count for debugging
594
+ try:
595
+ tc2 = msg.get("tool_calls")
596
+ count = len(tc2) if isinstance(tc2, list) else 0
597
+ logger.info({
598
+ "llm.tool_calls": True,
599
+ "count": count,
600
+ "finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
601
+ })
602
+ if count == 0:
603
+ click.echo(
604
+ "[openai-client] ✗ upstream response missing tool_calls; dumping preview to logs",
605
+ err=True,
606
+ )
607
+ logger.error(
608
+ "Inference response missing tool_calls; failing fast. Raw body preview: %s",
609
+ body_text[:500] if body_text else "<empty>",
610
+ )
611
+ raise ValueError("Inference response missing tool_calls")
612
+ tool_call_count = count
613
+ except Exception:
614
+ pass
615
+ except Exception:
616
+ pass
617
+
618
+ click.echo(
619
+ f"[openai-client] ✓ response ok with tool_calls={tool_call_count}",
620
+ err=True,
621
+ )
337
622
  return result
338
623
 
339
624
  except httpx.TimeoutException:
@@ -342,11 +627,31 @@ class OpenAIClient:
342
627
  except httpx.HTTPStatusError as e:
343
628
  status = e.response.status_code if e.response is not None else None
344
629
  text = e.response.text if e.response is not None else str(e)
345
- # Log minimal error info only
346
- logger.error({"openai_http_error": True, "status": status})
347
- # For 4xx/5xx, print full sanitized request to aid debugging (especially Groq 400s)
348
- # Suppress prompt/payload logging entirely
349
- # Special case: token budget exceeded (OpenAI-compatible error schema)
630
+ # Log full body and request diagnostics for debugging remote failures
631
+ try:
632
+ redacted_headers = dict(headers)
633
+ if "Authorization" in redacted_headers:
634
+ redacted_headers["Authorization"] = "***REDACTED***"
635
+ logger.error(
636
+ {
637
+ "openai_http_error": True,
638
+ "status": status,
639
+ "url": url,
640
+ "body": text,
641
+ }
642
+ )
643
+ logger.error(
644
+ {
645
+ "request_debug": True,
646
+ "status": status,
647
+ "target": url,
648
+ "headers": redacted_headers,
649
+ "payload": processed_request,
650
+ }
651
+ )
652
+ except Exception:
653
+ logger.error(f"HTTP error from {url}: {status} - {text}")
654
+ # Special case: token budget exceeded handled below, else 422 degrade, else re-raise
350
655
  try:
351
656
  if status == 400 and e.response is not None:
352
657
  data = e.response.json()
@@ -399,6 +704,8 @@ class OpenAIClient:
399
704
  logger.warning(
400
705
  {
401
706
  "token_budget_recovery": True,
707
+ "messages_tokens": messages_tokens,
708
+ "model_limit": model_limit,
402
709
  "retry_max_tokens": new_max,
403
710
  }
404
711
  )
@@ -413,35 +720,6 @@ class OpenAIClient:
413
720
  pass
414
721
  except Exception:
415
722
  pass
416
- # Gracefully degrade on 422 so rollouts can still produce a trajectory
417
- if status == 422:
418
- try:
419
- # Best-effort parse of error for diagnostics
420
- err = None
421
- try:
422
- err = e.response.json()
423
- except Exception:
424
- err = {"error": "unprocessable"}
425
- logger.warning({"inference_422_recovered": True})
426
- except Exception:
427
- pass
428
- # Return a minimal OpenAI-compatible response with no tool_calls/content
429
- import time as _t
430
-
431
- return {
432
- "id": f"cmpl-{int(_t.time())}",
433
- "object": "chat.completion",
434
- "created": int(_t.time()),
435
- "model": processed_request.get("model") or "unknown",
436
- "choices": [
437
- {
438
- "index": 0,
439
- "message": {"role": "assistant", "content": "", "tool_calls": []},
440
- "finish_reason": "stop",
441
- }
442
- ],
443
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
444
- }
445
723
  raise
446
724
  except Exception as e:
447
725
  logger.error(f"Unexpected error calling {url}: {e}")
@@ -507,14 +785,29 @@ class OpenAIClient:
507
785
  OpenAI-compatible chat completion response
508
786
  """
509
787
  last_error = None
788
+ processed_request: dict[str, Any] = dict(request or {})
510
789
  wait_time = 1.0
511
790
 
512
791
  for attempt in range(max_retries + 1):
513
792
  try:
514
793
  # Apply parameter fixes to the request
794
+ # CRITICAL: Use proper URL parsing, not string concatenation!
795
+ target_base = base_url or self.base_url
796
+ if target_base:
797
+ parsed_target = urlparse(target_base)
798
+ target_path = parsed_target.path.rstrip("/")
799
+ if not target_path.endswith("/v1/chat/completions"):
800
+ new_target_path = f"{target_path}/v1/chat/completions" if target_path else "/v1/chat/completions"
801
+ parsed_target = parsed_target._replace(path=new_target_path)
802
+ target_url = urlunparse(parsed_target)
803
+ else:
804
+ target_url = target_base
805
+ else:
806
+ target_url = None
807
+
515
808
  processed_request = self._fix_model_parameters(
516
809
  request,
517
- target_url=(base_url or self.base_url).rstrip("/") + "/v1/chat/completions",
810
+ target_url=target_url,
518
811
  )
519
812
  return await self.generate(
520
813
  request=processed_request,
@@ -546,47 +839,16 @@ class OpenAIClient:
546
839
  error_block.get("code") or error_block.get("type") or ""
547
840
  ).lower()
548
841
  if error_code in {"tool_use_failed", "tool_call_failed"}:
549
- logger.warning(
842
+ logger.error(
550
843
  {
551
844
  "tool_use_failed": True,
552
845
  "target": (base_url or self.base_url),
553
846
  "message": error_block.get("message") if isinstance(error_block, dict) else None,
554
847
  }
555
848
  )
556
- fallback_actions = ["move_right", "move_up", "do"]
557
- fallback_response = {
558
- "id": f"fallback-{int(time.time() * 1000)}",
559
- "object": "chat.completion",
560
- "created": int(time.time()),
561
- "model": processed_request.get("model"),
562
- "choices": [
563
- {
564
- "index": 0,
565
- "message": {
566
- "role": "assistant",
567
- "content": "",
568
- "tool_calls": [
569
- {
570
- "id": f"call_fallback_{int(time.time() * 1000)}",
571
- "type": "function",
572
- "function": {
573
- "name": "interact_many",
574
- "arguments": json.dumps(
575
- {"actions": fallback_actions}
576
- ),
577
- },
578
- }
579
- ],
580
- },
581
- "finish_reason": "tool_calls",
582
- }
583
- ],
584
- }
585
- if isinstance(response_data.get("usage"), dict):
586
- fallback_response["usage"] = response_data["usage"]
587
- if isinstance(error_block, dict):
588
- fallback_response["error"] = error_block
589
- return fallback_response
849
+ raise RuntimeError(
850
+ f"Inference 400 response (tool call failed): {error_block.get('message') if isinstance(error_block, dict) else 'Tool call failed'}"
851
+ ) from e
590
852
  # This is a different type of 400 error, don't retry
591
853
  try:
592
854
  redacted_headers = {}
@@ -651,7 +913,9 @@ class OpenAIClient:
651
913
  await asyncio.sleep(wait_time)
652
914
  wait_time *= backoff_factor
653
915
 
654
- raise last_error
916
+ if last_error is not None:
917
+ raise last_error
918
+ raise RuntimeError("RL inference retries exhausted with no captured exception")
655
919
 
656
920
 
657
921
  def create_inference_client(
@@ -726,7 +990,8 @@ def create_inference_client(
726
990
  ) -> dict[str, Any]:
727
991
  return {"status": "ok", "dummy": True}
728
992
 
729
- return _DummyClient()
993
+ import typing as _t
994
+ return _t.cast(OpenAIClient, _DummyClient())
730
995
 
731
996
  return OpenAIClient(
732
997
  base_url=task_app.vllm_base_url,