synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -5,14 +5,21 @@ from __future__ import annotations
5
5
  import logging
6
6
  import os
7
7
  import sys
8
+ from urllib.parse import parse_qs, urlparse
8
9
  from collections.abc import Iterable, Sequence
9
10
  from contextlib import suppress
10
11
  from dataclasses import dataclass
12
+ from datetime import UTC, datetime
11
13
  from pathlib import Path
12
14
  from typing import Any
13
15
 
16
+ from fastapi import HTTPException
17
+ from pydantic import BaseModel
18
+
19
+ from pydantic import BaseModel
20
+
14
21
  from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
15
- from synth_ai.task.contracts import RolloutMetrics, RolloutRequest, RolloutResponse, TaskInfo
22
+ from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
16
23
  from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
17
24
  from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
18
25
  from synth_ai.task.rubrics import load_rubric
@@ -115,6 +122,27 @@ try:
115
122
  except Exception:
116
123
  pass
117
124
 
125
+ try:
126
+ from .synth_envs_hosted.utils import (
127
+ ensure_chat_completions_url,
128
+ extract_trace_correlation_id,
129
+ )
130
+ except Exception: # pragma: no cover - fallback when optional deps missing
131
+ def ensure_chat_completions_url(raw_url, mode=None):
132
+ return raw_url
133
+
134
+ def extract_trace_correlation_id(_raw_url, mode=None):
135
+ if not isinstance(_raw_url, str):
136
+ return None
137
+ parsed = urlparse(_raw_url)
138
+ query_params = parse_qs(parsed.query or "")
139
+ for key in ("cid", "trace", "trace_correlation_id"):
140
+ values = query_params.get(key) or []
141
+ for value in values:
142
+ if isinstance(value, str) and value.strip():
143
+ return value.strip()
144
+ return None
145
+
118
146
  HAS_HOSTED = True
119
147
  try:
120
148
  import crafter # type: ignore
@@ -306,7 +334,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
306
334
  def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
307
335
  return TaskInfo(
308
336
  task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
309
- environments=["crafter"],
337
+ environment="crafter",
310
338
  action_space={
311
339
  "type": "discrete",
312
340
  "size": len(crafter_constants.actions),
@@ -397,22 +425,29 @@ def provide_task_instances(
397
425
  dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
398
426
  ) -> Iterable[TaskInfo]:
399
427
  infos: list[TaskInfo] = []
428
+ base_observation = getattr(base_info, "observation", None)
429
+ if hasattr(base_observation, "model_dump"):
430
+ observation_template = base_observation.model_dump()
431
+ elif isinstance(base_observation, dict):
432
+ observation_template = dict(base_observation)
433
+ else:
434
+ observation_template = {}
400
435
  for seed_value in seeds:
401
436
  summary = dataset.describe_seed(seed_value)
402
437
  infos.append(
403
438
  TaskInfo(
404
439
  task=base_info.task,
405
- environments=base_info.environments,
440
+ environment=base_info.environment,
406
441
  action_space=base_info.action_space,
407
442
  observation={
408
- **base_info.observation,
443
+ **observation_template,
409
444
  "seed": seed_value,
410
445
  "traits": summary["traits"],
411
446
  "inventory": summary["inventory"],
412
447
  "player_position": summary["player_position"],
413
448
  },
414
449
  dataset={
415
- **base_info.dataset,
450
+ **base_info.dataset.model_dump(),
416
451
  "seed": seed_value,
417
452
  "difficulty": summary["difficulty"],
418
453
  "config": summary["config"],
@@ -536,7 +571,47 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
536
571
 
537
572
  request = _coerce_math_to_crafter(request)
538
573
 
574
+ record_cfg = request.record.model_copy(
575
+ update={
576
+ "return_trace": True,
577
+ "trace_format": "structured",
578
+ }
579
+ )
580
+ request = request.model_copy(update={"record": record_cfg})
581
+
539
582
  policy_cfg = dict(request.policy.config or {})
583
+ logger.info(
584
+ "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
585
+ sorted(policy_cfg.keys()),
586
+ policy_cfg.get("inference_url"),
587
+ request.run_id,
588
+ request.mode,
589
+ )
590
+ inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
591
+ if isinstance(inferred_url, str) and inferred_url:
592
+ policy_cfg["inference_url"] = inferred_url
593
+ else:
594
+ logger.warning(
595
+ "ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
596
+ request.run_id,
597
+ policy_cfg.get("inference_url"),
598
+ )
599
+
600
+ trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=request.mode)
601
+ if request.mode == RolloutMode.RL:
602
+ assert trace_correlation_id, (
603
+ f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
604
+ f"policy_cfg_keys={sorted(policy_cfg.keys())} inference_url={policy_cfg.get('inference_url')}"
605
+ )
606
+ if trace_correlation_id:
607
+ policy_cfg["trace_correlation_id"] = trace_correlation_id
608
+
609
+ pipeline_metadata: dict[str, Any] = {}
610
+ if trace_correlation_id:
611
+ pipeline_metadata["trace_correlation_id"] = trace_correlation_id
612
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
613
+ pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
614
+
540
615
  try:
541
616
  max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
542
617
  except Exception:
@@ -585,17 +660,122 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
585
660
  safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
586
661
  training_session_id=request.training_session_id,
587
662
  synth_base_url=request.synth_base_url,
663
+ mode=request.mode,
588
664
  )
589
665
 
590
666
  legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
591
667
  legacy_request, fastapi_request
592
668
  )
593
669
  data = legacy_response.model_dump()
670
+ logger.debug(
671
+ "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
672
+ sorted(data.keys()),
673
+ bool(data.get("trace")),
674
+ )
594
675
  metrics = data.get("metrics", {}) or {}
595
676
  metrics.setdefault("outcome_score", None)
596
677
  metrics.setdefault("events_score", None)
597
678
  metrics.setdefault("details", {})
598
679
  data["metrics"] = metrics
680
+
681
+ if data.get("trace") is None:
682
+ legacy_trace = getattr(legacy_response, "trace", None)
683
+ if legacy_trace is not None:
684
+ data["trace"] = legacy_trace
685
+ else:
686
+ tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
687
+ if callable(tracer_factory):
688
+ tracer = tracer_factory()
689
+ logger.debug(
690
+ "ROLLOUT_EXEC: trace backfill factory=%s", type(tracer)
691
+ )
692
+ if isinstance(tracer, SessionTracer):
693
+ try:
694
+ await tracer.initialize()
695
+ if tracer.db is not None:
696
+ trace_row = await tracer.db.get_session_trace(request.run_id)
697
+ if trace_row is not None:
698
+ data["trace"] = trace_row
699
+ except Exception as exc:
700
+ logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
701
+ finally:
702
+ with suppress(Exception):
703
+ await tracer.close()
704
+
705
+ final_cid = trace_correlation_id or f"trace_{request.run_id}"
706
+ data["trace_correlation_id"] = final_cid
707
+
708
+ existing_meta = data.get("pipeline_metadata")
709
+ if not isinstance(existing_meta, dict):
710
+ existing_meta = {}
711
+ existing_meta.setdefault("trace_correlation_id", final_cid)
712
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
713
+ existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
714
+ data["pipeline_metadata"] = existing_meta
715
+
716
+ # Propagate inference_url into each legacy trajectory entry for downstream tooling.
717
+ inferred_url = policy_cfg.get("inference_url")
718
+ # Normalize the url before propagating into trajectories
719
+ try:
720
+ from .synth_envs_hosted.utils import (
721
+ ensure_chat_completions_url as _ensure_cc,
722
+ force_normalize_chat_completions_url as _force_cc,
723
+ )
724
+ if isinstance(inferred_url, str) and inferred_url:
725
+ inferred_url = _force_cc(inferred_url)
726
+ inferred_url = _ensure_cc(inferred_url, mode=request.mode)
727
+ except Exception:
728
+ pass
729
+
730
+ if "trajectories" in data:
731
+ normalized_trajs: list[dict[str, Any]] = []
732
+ for traj in data.get("trajectories", []):
733
+ if isinstance(traj, BaseModel):
734
+ traj_dict = traj.model_dump()
735
+ elif isinstance(traj, dict):
736
+ traj_dict = dict(traj)
737
+ else:
738
+ continue
739
+ traj_dict.setdefault("trace_correlation_id", final_cid)
740
+ if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
741
+ traj_dict["inference_url"] = inferred_url
742
+
743
+ # Inject nested info.meta.inference_url for each step (required by RL trainer)
744
+ try:
745
+ steps = traj_dict.get("steps", [])
746
+ if isinstance(steps, list):
747
+ for step in steps:
748
+ if not isinstance(step, dict):
749
+ continue
750
+ info = step.get("info")
751
+ if not isinstance(info, dict):
752
+ info = {}
753
+ meta = info.get("meta")
754
+ if not isinstance(meta, dict):
755
+ meta = {}
756
+ if isinstance(inferred_url, str) and inferred_url and not meta.get("inference_url"):
757
+ meta["inference_url"] = inferred_url
758
+ info["meta"] = meta
759
+ step["info"] = info
760
+ except Exception:
761
+ pass
762
+
763
+ normalized_trajs.append(traj_dict)
764
+ if normalized_trajs:
765
+ data["trajectories"] = normalized_trajs
766
+
767
+ if data.get("trace") is None:
768
+ data["trace"] = {
769
+ "session_id": request.run_id,
770
+ "created_at": datetime.now(UTC).isoformat(),
771
+ "metadata": dict(existing_meta),
772
+ "event_history": [],
773
+ "markov_blanket_message_history": [],
774
+ }
775
+ raise HTTPException(
776
+ status_code=500, detail="trace_payload_missing: task app did not emit a SessionTrace"
777
+ )
778
+
599
779
  return RolloutResponse.model_validate(data)
600
780
 
601
781
 
@@ -3,7 +3,7 @@
3
3
  This module now delegates to the TaskAppConfig defined in the colocated example at
4
4
  `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
6
- `uvx synth-ai serve grpo-crafter` for local development and testing.
6
+ `uvx synth-ai deploy --runtime uvicorn grpo-crafter` for local development and testing.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
@@ -148,8 +148,8 @@ class CrafterPolicy(Policy):
148
148
  if self.use_tools:
149
149
  payload["tools"] = TOOLS_SCHEMA
150
150
  payload["tool_choice"] = "required"
151
- # Ensure the inference server injects family-specific stop sequences
152
- # to terminate immediately after the first tool call for compliance.
151
+ payload["function_call"] = {"name": "interact_many"}
152
+ payload["parallel_tool_calls"] = False
153
153
  payload["stop_after_tool_calls"] = 1
154
154
  return payload
155
155
 
@@ -158,13 +158,7 @@ class CrafterPolicy(Policy):
158
158
  response: dict[str, Any],
159
159
  use_tools: bool = True,
160
160
  ) -> list[dict[str, Any]]:
161
- """Turn an inference response into environment tool calls.
162
-
163
- - If tools were used, expect tool_calls-compatible output and forward as-is
164
- in our simple JSON format: {"tool_name": str, "arguments": {...}}.
165
- - If no tools, parse plain-text actions using CrafterReActAgent parser and
166
- wrap them into a single interact_many tool call.
167
- """
161
+ """Turn an inference response into environment tool calls."""
168
162
  # First check if we got actual tool calls
169
163
  choices = response.get("choices", [])
170
164
  tool_calls: list[dict[str, Any]] = []
@@ -223,24 +217,6 @@ class CrafterPolicy(Policy):
223
217
  normalized.append(tc)
224
218
  return normalized
225
219
 
226
- # Otherwise, parse plain text content for actions
227
- text = ""
228
- for choice in choices:
229
- msg = choice.get("message", {})
230
- content = msg.get("content", "")
231
- if content:
232
- text = content
233
- break
234
-
235
- if text:
236
- # Try to parse actions from the text
237
- from .shared import parse_actions
238
-
239
- actions = parse_actions(text)
240
- if actions:
241
- # Wrap actions in interact_many tool call
242
- return [{"tool_name": "interact_many", "arguments": {"actions": actions}}]
243
-
244
220
  # No actions found
245
221
  return []
246
222
 
@@ -46,7 +46,7 @@ class CrafterReActAgent:
46
46
  "- Always return a single tool call: interact_many({actions: [...]})\n"
47
47
  "- Use 2–5 actions per call; prefer long movement sequences to explore.\n"
48
48
  "- Mix in 'do' only when it makes sense (tree, stone, animal, enemy nearby).\n"
49
- "- Do not spam the same exact sequence twice in a row—explore in varied directions.\n\n"
49
+ "\n"
50
50
  "Available actions: noop, move_up, move_down, move_left, move_right, do (interact), sleep, "
51
51
  "place_stone, place_table, place_furnace, place_plant, make_wood_pickaxe, make_stone_pickaxe, "
52
52
  "make_iron_pickaxe, make_wood_sword, make_stone_sword, make_iron_sword\n"
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import contextlib
4
+ import logging
4
5
  import os
5
6
 
6
7
  from fastapi import FastAPI
@@ -9,6 +10,52 @@ from fastapi.responses import JSONResponse
9
10
  from pydantic import BaseModel
10
11
  from starlette.requests import Request
11
12
 
13
+ logger = logging.getLogger(__name__)
14
+
15
+ _VERSION_LOGGED = False
16
+
17
+
18
+ def _resolve_task_app_version() -> str:
19
+ env_version = os.getenv("TASK_APP_VERSION")
20
+ if isinstance(env_version, str) and env_version.strip():
21
+ return env_version.strip()
22
+
23
+ try:
24
+ import importlib.metadata as importlib_metadata
25
+
26
+ pkg_version = importlib_metadata.version("synth-ai")
27
+ if isinstance(pkg_version, str) and pkg_version.strip():
28
+ return pkg_version.strip()
29
+ except Exception:
30
+ pass
31
+
32
+ try:
33
+ import synth_ai
34
+
35
+ attr_version = getattr(synth_ai, "__version__", None)
36
+ if isinstance(attr_version, str) and attr_version.strip():
37
+ return attr_version.strip()
38
+ except Exception:
39
+ pass
40
+
41
+ return "unknown"
42
+
43
+
44
+ def _log_task_app_version_once() -> None:
45
+ global _VERSION_LOGGED
46
+ if _VERSION_LOGGED:
47
+ return
48
+
49
+ version = _resolve_task_app_version()
50
+ build_id = os.getenv("TASK_APP_BUILD_ID")
51
+
52
+ if build_id:
53
+ logger.info("TASK_APP_VERSION: %s (build=%s)", version, build_id)
54
+ else:
55
+ logger.info("TASK_APP_VERSION: %s", version)
56
+
57
+ _VERSION_LOGGED = True
58
+
12
59
 
13
60
  class TaskApp:
14
61
  """Holds service configuration and shared state."""
@@ -56,6 +103,8 @@ def create_app(allowed_environments: list[str] = None) -> FastAPI:
56
103
  allow_headers=["*"],
57
104
  )
58
105
 
106
+ _log_task_app_version_once()
107
+
59
108
  # Initialize task app configuration
60
109
  task_app = TaskApp()
61
110
  app.state.task_app = task_app
@@ -3,6 +3,7 @@ from __future__ import annotations
3
3
  import asyncio
4
4
  import contextlib
5
5
  import logging
6
+ import os
6
7
  from typing import Any
7
8
 
8
9
  import httpx
@@ -23,6 +24,15 @@ class OpenAIClient:
23
24
  self.api_key = api_key
24
25
  self.timeout_s = timeout_s
25
26
  self.headers = {}
27
+ self._env_api_key: str | None = None
28
+
29
+ try:
30
+ env_key = os.getenv("ENVIRONMENT_API_KEY") or ""
31
+ env_key = env_key.strip()
32
+ if env_key:
33
+ self._env_api_key = env_key
34
+ except Exception:
35
+ self._env_api_key = None
26
36
 
27
37
  if api_key:
28
38
  self.headers["Authorization"] = f"Bearer {api_key}"
@@ -137,18 +147,49 @@ class OpenAIClient:
137
147
  Returns:
138
148
  OpenAI-compatible chat completion response
139
149
  """
140
- url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
150
+ # Build target URL robustly: if a full endpoint is given (with query or already ending
151
+ # in /chat/completions), preserve it; otherwise, append the path BEFORE query params.
152
+ from urllib.parse import urlparse, urlunparse
153
+
154
+ candidate = (base_url or self.base_url).strip()
155
+ try:
156
+ parsed = urlparse(candidate)
157
+ # If no scheme, treat as relative base (pass-through)
158
+ if not parsed.scheme or not parsed.netloc:
159
+ base_no_slash = candidate.rstrip("/")
160
+ url = f"{base_no_slash}/v1/chat/completions"
161
+ else:
162
+ path = (parsed.path or "").rstrip("/")
163
+ if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
164
+ new_path = path
165
+ elif path.endswith("/v1"):
166
+ new_path = f"{path}/chat/completions"
167
+ elif path.endswith("/chat"):
168
+ new_path = f"{path}/completions"
169
+ else:
170
+ new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
171
+ url = urlunparse(parsed._replace(path=new_path))
172
+ except Exception:
173
+ # Fallback to legacy behavior
174
+ url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
141
175
  timeout = timeout_s or self.timeout_s
142
176
 
143
177
  # Merge headers
144
178
  headers = self.headers.copy()
179
+ try:
180
+ parsed_target = urlparse(url)
181
+ path_for_auth = (parsed_target.path or "") if parsed_target else ""
182
+ if self._env_api_key and "/proxy/" in path_for_auth:
183
+ headers.setdefault("X-API-Key", self._env_api_key)
184
+ except Exception:
185
+ pass
145
186
  if extra_headers:
146
187
  headers.update(extra_headers)
147
188
 
148
189
  # Fix parameter compatibility for newer models
149
190
  processed_request = self._fix_model_parameters(request, target_url=url)
150
191
 
151
- # Log request (redact messages in production)
192
+ # Log request with detailed prompts/tools preview and sampling settings (Authorization is not logged)
152
193
  logger.info(f"Inference POST target: {url}")
153
194
  if extra_headers:
154
195
  logger.info(f"Extra headers: {extra_headers}")
@@ -156,13 +197,69 @@ class OpenAIClient:
156
197
  keys_preview = sorted(processed_request.keys())
157
198
  logger.info(f"Request keys: {keys_preview}")
158
199
 
159
- # Final hard-guard for OpenAI: ensure unsupported field is not present
200
+ # Detailed IO log: messages/tools/sampling and final payload fields
201
+ try:
202
+ import json as _json
203
+
204
+ def _truncate(text: str, limit: int = 2000) -> str:
205
+ return text if len(text) <= limit else text[:limit] + "…"
206
+
207
+ def _messages_preview(msgs: Any) -> str:
208
+ try:
209
+ out: list[dict[str, Any]] = []
210
+ if isinstance(msgs, list):
211
+ for m in msgs:
212
+ if not isinstance(m, dict):
213
+ continue
214
+ role = m.get("role")
215
+ content = m.get("content")
216
+ if isinstance(content, str):
217
+ text = content
218
+ elif isinstance(content, list):
219
+ parts: list[str] = []
220
+ for seg in content:
221
+ if isinstance(seg, dict) and isinstance(seg.get("text"), str):
222
+ parts.append(seg["text"])
223
+ text = "\n".join(parts)
224
+ else:
225
+ text = ""
226
+ out.append({"role": role, "content": _truncate(str(text), 4000)})
227
+ return _json.dumps(out)
228
+ except Exception:
229
+ return "[]"
230
+
231
+ def _tools_preview(tools: Any) -> str:
232
+ try:
233
+ return _truncate(_json.dumps(tools), 4000)
234
+ except Exception:
235
+ return "[]"
236
+
237
+ msgs = processed_request.get("messages") if isinstance(processed_request, dict) else None
238
+ tools = processed_request.get("tools") if isinstance(processed_request, dict) else None
239
+ io_log: dict[str, Any] = {
240
+ "llm.call": True,
241
+ "model": processed_request.get("model") if isinstance(processed_request, dict) else None,
242
+ "tool_choice": processed_request.get("tool_choice") if isinstance(processed_request, dict) else None,
243
+ "parallel_tool_calls": processed_request.get("parallel_tool_calls") if isinstance(processed_request, dict) else None,
244
+ "stop_after_tool_calls": processed_request.get("stop_after_tool_calls") if isinstance(processed_request, dict) else None,
245
+ "temperature": processed_request.get("temperature") if isinstance(processed_request, dict) else None,
246
+ "top_p": processed_request.get("top_p") if isinstance(processed_request, dict) else None,
247
+ "max_tokens": processed_request.get("max_tokens") if isinstance(processed_request, dict) else None,
248
+ "max_completion_tokens": processed_request.get("max_completion_tokens") if isinstance(processed_request, dict) else None,
249
+ "messages_preview": _messages_preview(msgs),
250
+ "tools_preview": _tools_preview(tools),
251
+ }
252
+ logger.info(io_log)
253
+ except Exception:
254
+ pass
255
+
256
+ # Final hard-guard for OpenAI/Groq: ensure unsupported field is not present
160
257
  try:
161
- if "openai" in url.lower() and "stop_after_tool_calls" in processed_request:
258
+ low_url = url.lower()
259
+ if ("openai" in low_url or "groq.com" in low_url or "/proxy/groq" in low_url) and "stop_after_tool_calls" in processed_request:
162
260
  processed_request.pop("stop_after_tool_calls", None)
163
- logger.info("Removed stop_after_tool_calls for OpenAI request")
261
+ logger.info("Removed stop_after_tool_calls for Groq/OpenAI request")
164
262
  # Groq-specific requirement: when using JSON mode, one of the messages must contain the word 'json'
165
- low_url = url.lower()
166
263
  if ("groq.com" in low_url or "/openai" in low_url) and isinstance(
167
264
  processed_request, dict
168
265
  ):
@@ -228,13 +325,54 @@ class OpenAIClient:
228
325
  f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
229
326
  )
230
327
  if body_text:
231
- preview_len = min(800, len(body_text))
232
- logger.info(
233
- f"Inference response preview ({preview_len} bytes): {body_text[:preview_len]}"
234
- )
328
+ # Log raw output with generous preview to debug no-tool-call issues
329
+ preview_len = min(4000, len(body_text))
330
+ logger.info({
331
+ "llm.raw_response": True,
332
+ "bytes": len(body_text),
333
+ "preview": body_text[:preview_len],
334
+ })
235
335
 
236
336
  result = response.json()
237
337
  logger.info(f"Inference response parsed_type={type(result).__name__}")
338
+
339
+ # Normalize tool calls so downstream always sees a function tool call
340
+ try:
341
+ if isinstance(result, dict):
342
+ choices = result.get("choices")
343
+ if isinstance(choices, list) and choices:
344
+ msg = choices[0].get("message")
345
+ if isinstance(msg, dict):
346
+ # Prefer tool_calls; if missing but function_call is present, synthesize tool_calls
347
+ tc = msg.get("tool_calls")
348
+ fc = msg.get("function_call")
349
+ if (not isinstance(tc, list) or not tc) and isinstance(fc, dict):
350
+ name = fc.get("name") or "interact_many"
351
+ args = fc.get("arguments") or "{}"
352
+ msg["tool_calls"] = [
353
+ {
354
+ "id": "call_norm",
355
+ "type": "function",
356
+ "function": {"name": name, "arguments": args},
357
+ }
358
+ ]
359
+ # Encourage downstream to treat this as a tool call
360
+ if isinstance(choices[0], dict):
361
+ choices[0]["finish_reason"] = "tool_calls"
362
+ # Log tool call count for debugging
363
+ try:
364
+ tc2 = msg.get("tool_calls")
365
+ count = len(tc2) if isinstance(tc2, list) else 0
366
+ logger.info({
367
+ "llm.tool_calls": True,
368
+ "count": count,
369
+ "finish_reason": choices[0].get("finish_reason") if isinstance(choices[0], dict) else None,
370
+ })
371
+ except Exception:
372
+ pass
373
+ except Exception:
374
+ pass
375
+
238
376
  return result
239
377
 
240
378
  except httpx.TimeoutException:
@@ -340,40 +478,6 @@ class OpenAIClient:
340
478
  pass
341
479
  except Exception:
342
480
  pass
343
- # Gracefully degrade on 422 so rollouts can still produce a trajectory
344
- if status == 422:
345
- try:
346
- # Best-effort parse of error for diagnostics
347
- err = None
348
- try:
349
- err = e.response.json()
350
- except Exception:
351
- err = {"error": "unprocessable", "detail": (text or "")[:200]}
352
- logger.warning(
353
- {
354
- "inference_422_recovered": True,
355
- "detail": err,
356
- }
357
- )
358
- except Exception:
359
- pass
360
- # Return a minimal OpenAI-compatible response with no tool_calls/content
361
- import time as _t
362
-
363
- return {
364
- "id": f"cmpl-{int(_t.time())}",
365
- "object": "chat.completion",
366
- "created": int(_t.time()),
367
- "model": processed_request.get("model") or "unknown",
368
- "choices": [
369
- {
370
- "index": 0,
371
- "message": {"role": "assistant", "content": "", "tool_calls": []},
372
- "finish_reason": "stop",
373
- }
374
- ],
375
- "usage": {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0},
376
- }
377
481
  raise
378
482
  except Exception as e:
379
483
  logger.error(f"Unexpected error calling {url}: {e}")
@@ -399,7 +503,14 @@ class OpenAIClient:
399
503
 
400
504
  try:
401
505
  async with httpx.AsyncClient(timeout=timeout) as client:
402
- response = await client.get(url, headers=self.headers)
506
+ headers = self.headers.copy()
507
+ try:
508
+ parsed = httpx.URL(url)
509
+ if self._env_api_key and "/proxy/" in (parsed.path or ""):
510
+ headers.setdefault("X-API-Key", self._env_api_key)
511
+ except Exception:
512
+ pass
513
+ response = await client.get(url, headers=headers)
403
514
  response.raise_for_status()
404
515
  return response.json()
405
516
  except httpx.HTTPStatusError as e: