synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -4,6 +4,7 @@ import contextlib
4
4
  import logging
5
5
  import os
6
6
  from datetime import datetime
7
+ import asyncio
7
8
  from typing import Any
8
9
 
9
10
  from fastapi import APIRouter, HTTPException, Request
@@ -35,6 +36,13 @@ logger = logging.getLogger(__name__)
35
36
 
36
37
  router = APIRouter()
37
38
 
39
+ # Global concurrency limit for outbound inference to avoid backend overload/timeouts
40
+ try:
41
+ _INFERENCE_CONCURRENCY = int(os.getenv("INFERENCE_CONCURRENCY", "2") or "2")
42
+ except Exception: # pragma: no cover
43
+ _INFERENCE_CONCURRENCY = 2
44
+ _inference_sem = asyncio.Semaphore(max(1, _INFERENCE_CONCURRENCY))
45
+
38
46
 
39
47
  class PolicyCreateRequest(BaseModel):
40
48
  policy_name: str
@@ -250,6 +258,11 @@ async def step_policy(
250
258
  task_app = req.app.state.task_app
251
259
  policy = handle.policy
252
260
  tracing_context = getattr(req.state, "rollout_tracing", None)
261
+ if tracing_context is None:
262
+ print(
263
+ f"[TRACE_DEBUG] Missing tracing context on policy step; policy_id={request.policy_id}",
264
+ flush=True,
265
+ )
253
266
 
254
267
  obs_text = request.observation
255
268
  if isinstance(request.observation, dict):
@@ -462,6 +475,8 @@ async def step_policy(
462
475
  )
463
476
 
464
477
  # Emit full system/user prompts for observability (no secrets included)
478
+ system_prompt_records: list[dict[str, Any]] = []
479
+ user_prompt_records: list[dict[str, Any]] = []
465
480
  try:
466
481
 
467
482
  def _as_text(content: object) -> str:
@@ -481,8 +496,6 @@ async def step_policy(
481
496
  return "".join(parts)
482
497
  return str(content)
483
498
 
484
- system_prompt_records: list[dict[str, Any]] = []
485
- user_prompt_records: list[dict[str, Any]] = []
486
499
  for message in msgs:
487
500
  role = message.get("role")
488
501
  raw_content = message.get("content")
@@ -525,6 +538,11 @@ async def step_policy(
525
538
 
526
539
  if tracing_context is not None:
527
540
  try:
541
+ logger.info(
542
+ "[TRACE_DEBUG] record_policy_prompts sys=%s user=%s",
543
+ len(system_prompt_records),
544
+ len(user_prompt_records),
545
+ )
528
546
  await tracing_context.record_policy_prompts(
529
547
  system_prompt_records, user_prompt_records
530
548
  )
@@ -541,6 +559,14 @@ async def step_policy(
541
559
 
542
560
  # Ensure meta carries the final target URL for downstream logging/clients
543
561
  with contextlib.suppress(Exception):
562
+ # Bulletproof normalizer at the call site (in addition to client-side)
563
+ try:
564
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.utils import (
565
+ force_normalize_chat_completions_url,
566
+ )
567
+ target_url = force_normalize_chat_completions_url(target_url)
568
+ except Exception:
569
+ pass
544
570
  sanitized_target = ensure_chat_completions_url(target_url)
545
571
  if sanitized_target and sanitized_target != target_url:
546
572
  logger.warning(
@@ -589,6 +615,28 @@ async def step_policy(
589
615
  except Exception:
590
616
  api_key_override = None
591
617
 
618
+ # Fallback: If target is OpenAI but OPENAI_API_KEY is missing, route to Synth API
619
+ try:
620
+ import os as _os2
621
+ _low = str(target_url or "").lower()
622
+ if ("api.openai.com" in _low) and not (_os2.getenv("OPENAI_API_KEY")):
623
+ # Prefer task_app.synth_base_url if available; else default
624
+ synth_base = getattr(task_app, "synth_base_url", None)
625
+ if isinstance(synth_base, str) and synth_base.strip():
626
+ base = synth_base.rstrip("/")
627
+ fallback = base + "/inference/v1/chat/completions"
628
+ else:
629
+ fallback = "https://api.synth.run/api/inference/v1/chat/completions"
630
+ fixed = ensure_chat_completions_url(fallback)
631
+ logger.warning(
632
+ "POLICY_STEP: OPENAI key missing; falling back to Synth route %s",
633
+ fixed,
634
+ )
635
+ meta["inference_url"] = fixed
636
+ target_url = fixed
637
+ except Exception:
638
+ pass
639
+
592
640
  if api_key_override:
593
641
  try:
594
642
  masked = f"{api_key_override[:6]}…{api_key_override[-4:]}"
@@ -780,9 +828,10 @@ async def step_policy(
780
828
  "sokoban-react",
781
829
  "crafter-react",
782
830
  ) and getattr(policy, "use_tools", True):
783
- req_tools = meta["inference_request"]["tools"]
784
- req_tool_choice = meta["inference_request"]["tool_choice"]
785
- req_stop_after = meta["inference_request"]["stop_after_tool_calls"]
831
+ inf_req = meta.get("inference_request", {})
832
+ req_tools = inf_req.get("tools")
833
+ req_tool_choice = inf_req.get("tool_choice")
834
+ req_stop_after = inf_req.get("stop_after_tool_calls")
786
835
  logger.info(
787
836
  f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
788
837
  )
@@ -791,6 +840,8 @@ async def step_policy(
791
840
  status_code=500,
792
841
  detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
793
842
  )
843
+ if req_stop_after is None:
844
+ inf_req["stop_after_tool_calls"] = 1
794
845
 
795
846
  # Call inference service with retries for Flash cold-start (503)
796
847
  import time as _t
@@ -967,13 +1018,14 @@ async def step_policy(
967
1018
 
968
1019
  _t_start = _t.time()
969
1020
  call_started_at = datetime.utcnow()
970
- inference_response = await client.generate_with_retries(
971
- request=meta["inference_request"],
972
- base_url=meta["inference_url"],
973
- max_retries=12,
974
- backoff_factor=2.0,
975
- extra_headers=extra_headers,
976
- )
1021
+ async with _inference_sem:
1022
+ inference_response = await client.generate_with_retries(
1023
+ request=meta["inference_request"],
1024
+ base_url=meta["inference_url"],
1025
+ max_retries=12,
1026
+ backoff_factor=2.0,
1027
+ extra_headers=extra_headers,
1028
+ )
977
1029
  meta["inference_ms"] = int((_t.time() - _t_start) * 1000)
978
1030
  call_completed_at = datetime.utcnow()
979
1031
 
@@ -1053,6 +1105,23 @@ async def step_policy(
1053
1105
  except Exception as exc:
1054
1106
  logger.debug(f"TRACING_LLM_FAIL: {exc}")
1055
1107
 
1108
+ if not tool_calls:
1109
+ preview = ""
1110
+ try:
1111
+ preview = str(meta.get("raw_response") or "")[:400]
1112
+ except Exception:
1113
+ preview = "<unavailable>"
1114
+ logger.error(
1115
+ {
1116
+ "rollout.policy_step": True,
1117
+ "policy_id": request.policy_id,
1118
+ "error": "no_tool_calls",
1119
+ "inference_url": meta.get("inference_url"),
1120
+ "raw_preview": preview,
1121
+ }
1122
+ )
1123
+ raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
1124
+
1056
1125
  return PolicyStepResponse(
1057
1126
  tool_calls=tool_calls,
1058
1127
  meta=meta,
@@ -491,6 +491,10 @@ class RolloutTracingContext:
491
491
  getattr(request.record, "trace_format", "compact") or "compact"
492
492
  ).lower()
493
493
  self.return_trace = bool(getattr(request.record, "return_trace", False))
494
+ print(
495
+ f"[TRACE_DEBUG] RolloutTracingContext init: trace_format={self.trace_format} return_trace={self.return_trace}",
496
+ flush=True,
497
+ )
494
498
  self.sft_output_dir = getattr(fastapi_request.app.state, "sft_output_dir", None)
495
499
  self.session_trace = None
496
500
  self.metadata_updates: dict[str, Any] = {}
@@ -513,19 +517,24 @@ class RolloutTracingContext:
513
517
 
514
518
  async def start_session(self) -> None:
515
519
  if not self.enabled or self.tracer is None:
520
+ print("[TRACE_DEBUG] start_session skipped: tracer disabled", flush=True)
516
521
  return
517
522
  try:
518
523
  await self.tracer.initialize()
524
+ print("[TRACE_DEBUG] tracer initialized", flush=True)
519
525
  except Exception as exc:
520
526
  logger.debug("TRACING_INIT_FAIL: %s", exc)
527
+ # Hard fail: tracing requested but cannot initialize
528
+ raise
521
529
  try:
522
530
  await self.tracer.start_session(
523
531
  session_id=self.run_id, metadata=dict(self.metadata_base)
524
532
  )
533
+ print(f"[TRACE_DEBUG] start_session succeeded for run_id={self.run_id}", flush=True)
525
534
  except Exception as exc:
526
535
  logger.info("TRACING_START_FAIL: %s", exc)
527
- self.enabled = False
528
- self.tracer = None
536
+ # Hard fail: tracing requested but cannot start session
537
+ raise
529
538
 
530
539
  async def start_decision(self, turn_number: int) -> None:
531
540
  self.current_turn = turn_number
@@ -590,7 +599,7 @@ class RolloutTracingContext:
590
599
  # Debug: Check message count
591
600
  if self.tracer and self.tracer._current_trace:
592
601
  msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
593
- logger.info(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages in trace")
602
+ print(f"[TRACE_DEBUG] After record_policy_prompts: {msg_count} messages", flush=True)
594
603
 
595
604
  def _content_to_text(self, content: Any) -> str:
596
605
  if isinstance(content, str):
@@ -664,11 +673,20 @@ class RolloutTracingContext:
664
673
  return
665
674
  if self.enabled and self.tracer is not None:
666
675
  try:
676
+ payload = {
677
+ "role": "assistant",
678
+ "tool_calls": tool_calls,
679
+ }
667
680
  await self.tracer.record_message(
668
- content=self._safe_json(tool_calls),
669
- message_type="assistant", # Map to standard assistant message type
681
+ content=payload,
682
+ message_type="assistant",
670
683
  metadata={**self._message_metadata(), "is_tool_call": True},
671
684
  )
685
+ if self.tracer._current_trace:
686
+ print(
687
+ f"[TRACE_DEBUG] After tool invocation: messages={len(self.tracer._current_trace.markov_blanket_message_history)}",
688
+ flush=True,
689
+ )
672
690
  except Exception as exc:
673
691
  logger.debug("TRACING_TOOL_MSG_FAIL: %s", exc)
674
692
 
@@ -774,9 +792,33 @@ class RolloutTracingContext:
774
792
  }
775
793
  )
776
794
 
795
+ assistant_structured = assistant_content if assistant_content is not None else ""
796
+ assistant_text = self._content_to_text(assistant_content)
797
+
798
+ if self.enabled and self.tracer is not None:
799
+ assistant_payload: dict[str, Any] = {
800
+ "role": "assistant",
801
+ "content": assistant_structured,
802
+ "text": assistant_text,
803
+ }
804
+ if isinstance(assistant_message, dict):
805
+ if assistant_message.get("tool_calls"):
806
+ assistant_payload["tool_calls"] = assistant_message.get("tool_calls")
807
+ if assistant_message.get("reasoning"):
808
+ assistant_payload["reasoning"] = assistant_message.get("reasoning")
809
+ if assistant_message.get("thinking"):
810
+ assistant_payload["thinking"] = assistant_message.get("thinking")
811
+ try:
812
+ await self.tracer.record_message(
813
+ content=assistant_payload,
814
+ message_type="assistant",
815
+ metadata=self._message_metadata(),
816
+ )
817
+ except Exception as exc:
818
+ logger.debug("TRACING_ASSISTANT_MSG_FAIL: %s", exc)
819
+
777
820
  if self.sft_output_dir is not None:
778
821
  assistant_structured = assistant_content if assistant_content is not None else ""
779
- assistant_text = self._content_to_text(assistant_content)
780
822
  dialogue_structured: list[dict[str, Any]] = []
781
823
  for content in self.latest_system_prompt_content:
782
824
  if content is None:
@@ -941,17 +983,23 @@ class RolloutTracingContext:
941
983
  # Debug: Check message count before end_session
942
984
  if self.tracer._current_trace:
943
985
  msg_count = len(self.tracer._current_trace.markov_blanket_message_history)
944
- logger.info(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace")
945
-
986
+ print(f"[TRACE_DEBUG] Before end_session: {msg_count} messages in trace", flush=True)
987
+
946
988
  self.session_trace = await self.tracer.end_session()
947
989
 
948
990
  # Debug: Check if session was saved
949
991
  if self.session_trace:
950
- logger.info(f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}")
992
+ print(
993
+ f"[TRACE_DEBUG] Session ended successfully, session_id={self.session_trace.session_id}",
994
+ flush=True,
995
+ )
951
996
  self.session_trace.metadata.update(self.metadata_updates)
952
- logger.info(f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}")
997
+ print(
998
+ f"[TRACE_DEBUG] session_trace.metadata keys: {list(self.session_trace.metadata.keys())}",
999
+ flush=True,
1000
+ )
953
1001
  else:
954
- logger.warning("[TRACE_DEBUG] end_session returned None!")
1002
+ print("[TRACE_DEBUG] end_session returned None!", flush=True)
955
1003
  except Exception as exc:
956
1004
  logger.warning(f"TRACING_END_SESSION_FAIL: {exc}", exc_info=True)
957
1005
  self.session_trace = None
@@ -991,6 +1039,10 @@ class RolloutTracingContext:
991
1039
  if self.trace_format in ("full", "structured"):
992
1040
  payload = session_trace.to_dict()
993
1041
  payload.setdefault("metadata", {}).update(self.metadata_updates)
1042
+ print(
1043
+ f"[TRACE_DEBUG] build_trace_payload returning structured trace with messages={len(payload.get('markov_blanket_message_history') or [])}",
1044
+ flush=True,
1045
+ )
994
1046
  return payload
995
1047
 
996
1048
  # For "compact" format, return only summary stats
@@ -1929,6 +1981,15 @@ async def execute_rollout(
1929
1981
  if 'policy_config_snapshot' not in locals():
1930
1982
  policy_config_snapshot = {}
1931
1983
 
1984
+ # Normalize inference URL for trajectory (and ensure no path in query)
1985
+ try:
1986
+ from .utils import force_normalize_chat_completions_url, ensure_chat_completions_url
1987
+ inference_url = force_normalize_chat_completions_url(inference_url)
1988
+ # apply mode-aware normalization too (keeps cid, appends path if missing)
1989
+ inference_url = ensure_chat_completions_url(inference_url, mode=request.mode)
1990
+ except Exception:
1991
+ pass
1992
+
1932
1993
  logger.info(
1933
1994
  "ROLLOUT_TRAJECTORY: run_id=%s policy_id=%s inference_url=%s trace_id=%s",
1934
1995
  request.run_id,
@@ -2043,6 +2104,16 @@ async def execute_rollout(
2043
2104
  if metrics.num_steps <= 0:
2044
2105
  raise HTTPException(status_code=500, detail="no_steps_executed: avg_turns == 0")
2045
2106
 
2107
+ # Ensure at least one tool call executed successfully
2108
+ tool_call_executed = any(
2109
+ isinstance(step.tool_calls, list) and len(step.tool_calls) > 0 for step in trajectory_steps
2110
+ )
2111
+ if not tool_call_executed:
2112
+ raise HTTPException(
2113
+ status_code=502,
2114
+ detail="no_tool_calls_executed: model failed to produce actionable tool calls.",
2115
+ )
2116
+
2046
2117
  response = RolloutResponse(
2047
2118
  run_id=request.run_id,
2048
2119
  trajectories=[trajectory],
@@ -11,6 +11,129 @@ logger = logging.getLogger(__name__)
11
11
  _CHAT_COMPLETIONS_SUFFIX = "/v1/chat/completions"
12
12
 
13
13
 
14
+ def force_normalize_chat_completions_url(raw_url: Any) -> str:
15
+ """
16
+ Bulletproof normalizer: converts ANY malformed inference URL into the
17
+ correct chat-completions URL form.
18
+
19
+ Rules:
20
+ - Final path MUST end with /v1/chat/completions
21
+ - Query MUST NOT contain any '/' characters (no path segments in query)
22
+ - If the original query contained a path (e.g., '?cid=.../v1/chat/completions'),
23
+ extract that path and move it to the URL path; keep remaining query params
24
+ - Preserve scheme, host, port and existing query params order as much as possible
25
+
26
+ Examples:
27
+ https://host?cid=trace_123/v1/chat/completions
28
+ -> https://host/v1/chat/completions?cid=trace_123
29
+ https://host:8000?cid=trace_abc/v1/chat/completions&foo=bar
30
+ -> https://host:8000/v1/chat/completions?cid=trace_abc&foo=bar
31
+ https://host?cid=trace_123/v1/chat/completions?other=param
32
+ -> https://host/v1/chat/completions?cid=trace_123&other=param
33
+ """
34
+ if not isinstance(raw_url, str):
35
+ return raw_url
36
+ url = raw_url.strip()
37
+ if not url:
38
+ return raw_url
39
+
40
+ parsed = urlparse(url)
41
+ path = (parsed.path or "").rstrip("/")
42
+ query = parsed.query or ""
43
+
44
+ # If query contains a path (has '/'), extract and repair
45
+ if query and "/" in query:
46
+ # Split query at the first '/' (everything before is real query params)
47
+ before_slash, after_slash = query.split("/", 1)
48
+
49
+ # after_slash may contain path and then more query params separated by '&' or '?' (malformed)
50
+ sep_indices = [i for i in [after_slash.find("&"), after_slash.find("?")] if i >= 0]
51
+ cut_idx = min(sep_indices) if sep_indices else len(after_slash)
52
+ path_from_query = "/" + after_slash[:cut_idx] # restore leading '/'
53
+ extra_query = after_slash[cut_idx + 1 :] if cut_idx < len(after_slash) else ""
54
+
55
+ # Merge query params: base (before_slash) + extra_query
56
+ merged_query = before_slash
57
+ if extra_query:
58
+ merged_query = f"{merged_query}&{extra_query}" if merged_query else extra_query
59
+
60
+ # Decide final path
61
+ if path_from_query.startswith(_CHAT_COMPLETIONS_SUFFIX):
62
+ final_path = path_from_query
63
+ else:
64
+ final_path = f"{path_from_query.rstrip('/')}{_CHAT_COMPLETIONS_SUFFIX}"
65
+
66
+ parsed = parsed._replace(path=final_path, query=merged_query)
67
+ url = urlunparse(parsed)
68
+ parsed = urlparse(url)
69
+ path = parsed.path or ""
70
+ query = parsed.query or ""
71
+
72
+ # Ensure path ends with chat completions suffix
73
+ if not path.endswith(_CHAT_COMPLETIONS_SUFFIX):
74
+ new_path = f"{path}{_CHAT_COMPLETIONS_SUFFIX}" if path else _CHAT_COMPLETIONS_SUFFIX
75
+ parsed = parsed._replace(path=new_path)
76
+ url = urlunparse(parsed)
77
+ parsed = urlparse(url)
78
+ path = parsed.path or ""
79
+ query = parsed.query or ""
80
+
81
+ # Final validation: no '/' in query
82
+ if query and "/" in query:
83
+ # As a last resort, drop anything after the first '/'
84
+ safe_query = query.split("/")[0]
85
+ parsed = parsed._replace(query=safe_query)
86
+ url = urlunparse(parsed)
87
+
88
+ return url
89
+
90
+
91
+ def _validate_url_structure(url: str, context: str = "") -> None:
92
+ """
93
+ Validate that a URL has correct structure (path before query, not vice versa).
94
+
95
+ Raises ValueError if URL is malformed.
96
+
97
+ Args:
98
+ url: The URL to validate
99
+ context: Optional context for error messages
100
+
101
+ Raises:
102
+ ValueError: If URL is malformed (path-like segments in query string)
103
+ """
104
+ if not isinstance(url, str) or not url.strip():
105
+ return
106
+
107
+ try:
108
+ parsed = urlparse(url)
109
+ query = parsed.query or ""
110
+
111
+ # CRITICAL CHECK: If query contains path-like segments (contains /), it's malformed
112
+ if query and "/" in query:
113
+ path_segment = query.split("/", 1)[1] if "/" in query else ""
114
+ error_msg = (
115
+ f"FATAL [TASK_APP_URL_VALIDATION]: Malformed inference URL detected!\n"
116
+ f"\n"
117
+ f"URL: {url}\n"
118
+ f"Context: {context}\n"
119
+ f"\n"
120
+ f"The URL has a path-like segment ('/{path_segment}') in the query string.\n"
121
+ f"This indicates incorrect URL construction upstream.\n"
122
+ f"\n"
123
+ f"Expected: https://host/v1/chat/completions?cid=trace_123\n"
124
+ f"Malformed: https://host?cid=trace_123/v1/chat/completions\n"
125
+ f"\n"
126
+ f"This should be caught by the trainer, but if you see this,\n"
127
+ f"the trainer's URL validation may have failed.\n"
128
+ )
129
+ logger.error(error_msg)
130
+ raise ValueError(error_msg)
131
+ except ValueError:
132
+ raise
133
+ except Exception as e:
134
+ logger.warning(f"[URL_VALIDATION] Failed to parse URL: {url} (context: {context}, error: {e})")
135
+
136
+
14
137
  def ensure_chat_completions_url(raw_url: Any, mode: str | None = None) -> Any:
15
138
  """
16
139
  Ensure inference URLs point at the chat completions endpoint.
@@ -43,9 +166,75 @@ def ensure_chat_completions_url(raw_url: Any, mode: str | None = None) -> Any:
43
166
 
44
167
  parsed = urlparse(url)
45
168
  path = (parsed.path or "").rstrip("/")
169
+ query = parsed.query
170
+
171
+ logger.debug(
172
+ "ensure_chat_completions_url: parsing url=%s -> path=%r query=%r",
173
+ url,
174
+ path,
175
+ query,
176
+ )
177
+
178
+ # CRITICAL: Check for malformed URLs (path in query) and fix them FIRST
179
+ # Example: https://host?cid=trace_123/v1/chat/completions
180
+ # Should be: https://host/v1/chat/completions?cid=trace_123
181
+ if query and "/" in query:
182
+ logger.error(
183
+ f"[URL_FIX] Detected malformed URL in ensure_chat_completions_url: {url}\n"
184
+ f"Path-like segment found in query string. Attempting to fix..."
185
+ )
186
+ # Split query at first "/" to separate query params from path
187
+ query_parts = query.split("/", 1)
188
+ if len(query_parts) == 2:
189
+ # query_parts[0] is the actual query (e.g., "cid=trace_123")
190
+ # query_parts[1] is the path that was incorrectly put in query
191
+ actual_query = query_parts[0]
192
+ path_and_more = query_parts[1] # Could be "v1/chat/completions" or "v1/chat/completions&foo=bar"
193
+
194
+ # Extract the path part (everything before "&" or "?" if present)
195
+ # Handle both "&" (query param separator) and "?" (another malformed query separator)
196
+ if "&" in path_and_more:
197
+ # Path is followed by more query params (separated by &)
198
+ path_segment, extra_query = path_and_more.split("&", 1)
199
+ path_in_query = "/" + path_segment # Restore leading slash
200
+ # Merge extra query params with actual_query
201
+ actual_query = f"{actual_query}&{extra_query}"
202
+ elif "?" in path_and_more:
203
+ # Path is followed by more query params (separated by ?, which is malformed)
204
+ path_segment, extra_query = path_and_more.split("?", 1)
205
+ path_in_query = "/" + path_segment # Restore leading slash
206
+ # Merge extra query params with actual_query (use & as separator)
207
+ actual_query = f"{actual_query}&{extra_query}"
208
+ else:
209
+ # No extra query params, just the path
210
+ path_in_query = "/" + path_and_more # Restore leading slash
211
+
212
+ # If the path_in_query already contains /v1/chat/completions, use it
213
+ # Otherwise, append /v1/chat/completions
214
+ if path_in_query.startswith("/v1/chat/completions"):
215
+ final_path = path_in_query
216
+ else:
217
+ # Append /v1/chat/completions to whatever path we found
218
+ final_path = path_in_query.rstrip("/") + "/v1/chat/completions"
219
+
220
+ # Reconstruct URL correctly: path comes before query
221
+ parsed = parsed._replace(path=final_path, query=actual_query)
222
+ fixed_url = urlunparse(parsed)
223
+ logger.warning(f"[URL_FIX] Fixed malformed URL:\n FROM: {url}\n TO: {fixed_url}")
224
+ url = fixed_url
225
+ # Re-parse after fix
226
+ parsed = urlparse(url)
227
+ path = parsed.path.rstrip("/")
228
+ query = parsed.query
229
+ else:
230
+ # Can't parse - this shouldn't happen but validate will catch it
231
+ logger.error(f"[URL_FIX] Could not parse malformed query: {query}")
232
+ _validate_url_structure(url, context="ensure_chat_completions_url input - cannot fix")
233
+
46
234
  if path.endswith("/v1/chat/completions"):
47
235
  logger.debug("ensure_chat_completions_url: URL already normalized %s", url)
48
- # Already targeting the desired endpoint; keep original to preserve trailing slash.
236
+ # Validate final URL
237
+ _validate_url_structure(url, context="ensure_chat_completions_url output")
49
238
  return url
50
239
 
51
240
  if not path:
@@ -55,6 +244,10 @@ def ensure_chat_completions_url(raw_url: Any, mode: str | None = None) -> Any:
55
244
 
56
245
  rebuilt = parsed._replace(path=new_path)
57
246
  normalized = urlunparse(rebuilt)
247
+
248
+ # CRITICAL: Validate the normalized URL
249
+ _validate_url_structure(normalized, context="ensure_chat_completions_url output")
250
+
58
251
  logger.info(
59
252
  "ensure_chat_completions_url: RL mode - normalized inference URL from %s to %s",
60
253
  url,
@@ -2,7 +2,7 @@
2
2
 
3
3
  This mirrors the structure of the Crafter task app wrapper while delegating
4
4
  all configuration to the colocated `grpo_enron.py` module. Normal usage should
5
- prefer invoking `uvx synth-ai serve grpo-enron`, but this module remains for
5
+ prefer invoking `uvx synth-ai deploy --runtime uvicorn grpo-enron`, but this module remains for
6
6
  direct execution or importing the FastAPI app object.
7
7
  """
8
8
 
@@ -0,0 +1,7 @@
1
+ """GEPA benchmark task apps (HotpotQA, IFBench, HoVer, PUPA)."""
2
+
3
+ # Import modules for side effects (task app registration) when package is imported.
4
+ from . import hotpotqa_task_app # noqa: F401
5
+ from . import hover_task_app # noqa: F401
6
+ from . import ifbench_task_app # noqa: F401
7
+ from . import pupa_task_app # noqa: F401