synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (889) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. examples/rl/task_app/math_single_step.py +990 -0
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +63 -0
  461. synth_ai/api/train/builders.py +473 -0
  462. synth_ai/api/train/cli.py +1185 -0
  463. synth_ai/api/train/config_finder.py +246 -0
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +352 -0
  470. synth_ai/api/train/pollers.py +91 -0
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +195 -0
  475. synth_ai/api/train/utils.py +244 -0
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +90 -45
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +166 -114
  554. synth_ai/cli/root.py +143 -112
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +3134 -0
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +745 -416
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +7 -1
  583. synth_ai/demos/demo_task_apps/core.py +75 -37
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/config.toml +55 -110
  591. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  592. synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
  593. synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
  594. synth_ai/demos/math/__init__.py +1 -0
  595. synth_ai/demos/math/_common.py +16 -0
  596. synth_ai/demos/math/app.py +38 -0
  597. synth_ai/demos/math/config.toml +76 -0
  598. synth_ai/demos/math/deploy_modal.py +54 -0
  599. synth_ai/demos/math/modal_task_app.py +703 -0
  600. synth_ai/demos/math/task_app_entry.py +51 -0
  601. synth_ai/environments/environment/core.py +7 -1
  602. synth_ai/environments/examples/bandit/engine.py +12 -5
  603. synth_ai/environments/examples/bandit/environment.py +0 -1
  604. synth_ai/environments/examples/bandit/taskset.py +4 -4
  605. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  606. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  607. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  608. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  609. synth_ai/environments/examples/enron/engine.py +7 -2
  610. synth_ai/environments/examples/enron/environment.py +68 -0
  611. synth_ai/environments/examples/red/engine.py +60 -12
  612. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  613. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  614. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  615. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  616. synth_ai/environments/examples/red/environment.py +86 -0
  617. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  618. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  619. synth_ai/environments/examples/verilog/engine.py +104 -12
  620. synth_ai/environments/examples/wordle/environment.py +0 -1
  621. synth_ai/environments/reproducibility/tree.py +5 -6
  622. synth_ai/environments/service/app.py +11 -12
  623. synth_ai/environments/service/core_routes.py +10 -9
  624. synth_ai/environments/stateful/engine.py +1 -1
  625. synth_ai/environments/tasks/core.py +1 -0
  626. synth_ai/environments/tasks/filters.py +5 -6
  627. synth_ai/environments/tasks/utils.py +4 -5
  628. synth_ai/evals/__init__.py +15 -0
  629. synth_ai/evals/base.py +14 -5
  630. synth_ai/evals/client.py +82 -0
  631. synth_ai/evals/types.py +42 -0
  632. synth_ai/http.py +8 -22
  633. synth_ai/http_client.py +45 -12
  634. synth_ai/inference/__init__.py +0 -2
  635. synth_ai/inference/client.py +21 -7
  636. synth_ai/jobs/client.py +129 -80
  637. synth_ai/judge_schemas.py +127 -0
  638. synth_ai/learning/__init__.py +51 -6
  639. synth_ai/learning/algorithms.py +14 -0
  640. synth_ai/learning/client.py +122 -30
  641. synth_ai/learning/config.py +2 -40
  642. synth_ai/learning/constants.py +0 -2
  643. synth_ai/learning/ft_client.py +4 -56
  644. synth_ai/learning/health.py +14 -8
  645. synth_ai/learning/jobs.py +43 -47
  646. synth_ai/learning/prompt_learning_client.py +276 -0
  647. synth_ai/learning/prompt_learning_types.py +185 -0
  648. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  649. synth_ai/learning/rl/client.py +269 -0
  650. synth_ai/learning/rl/config.py +31 -0
  651. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  652. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  653. synth_ai/learning/rl/secrets.py +13 -0
  654. synth_ai/learning/rl_client.py +2 -253
  655. synth_ai/learning/sft/__init__.py +29 -0
  656. synth_ai/learning/sft/client.py +68 -0
  657. synth_ai/learning/sft/config.py +270 -0
  658. synth_ai/learning/sft/data.py +698 -0
  659. synth_ai/learning/sse.py +25 -26
  660. synth_ai/learning/validators.py +29 -25
  661. synth_ai/mcp/__init__.py +5 -0
  662. synth_ai/mcp/__main__.py +8 -0
  663. synth_ai/mcp/main.py +254 -0
  664. synth_ai/mcp/setup.py +100 -0
  665. synth_ai/modal.py +257 -0
  666. synth_ai/pricing/__init__.py +3 -0
  667. synth_ai/pricing/model_pricing.py +64 -0
  668. synth_ai/session/__init__.py +75 -0
  669. synth_ai/session/client.py +383 -0
  670. synth_ai/session/constants.py +63 -0
  671. synth_ai/session/exceptions.py +105 -0
  672. synth_ai/session/manager.py +139 -0
  673. synth_ai/session/models.py +89 -0
  674. synth_ai/session/query.py +110 -0
  675. synth_ai/spec/__init__.py +46 -0
  676. synth_ai/spec/dataclasses.py +149 -0
  677. synth_ai/spec/loader.py +144 -0
  678. synth_ai/spec/serializer.py +199 -0
  679. synth_ai/spec/validation.py +250 -0
  680. synth_ai/streaming/__init__.py +29 -0
  681. synth_ai/streaming/config.py +94 -0
  682. synth_ai/streaming/handlers.py +589 -0
  683. synth_ai/streaming/streamer.py +320 -0
  684. synth_ai/streaming/types.py +95 -0
  685. synth_ai/task/__init__.py +116 -3
  686. synth_ai/task/apps/__init__.py +132 -0
  687. synth_ai/task/auth.py +165 -0
  688. synth_ai/task/client.py +167 -0
  689. synth_ai/task/config.py +261 -0
  690. synth_ai/task/contracts.py +173 -57
  691. synth_ai/task/datasets.py +108 -0
  692. synth_ai/task/errors.py +50 -0
  693. synth_ai/task/health.py +17 -11
  694. synth_ai/task/inference_api.py +101 -0
  695. synth_ai/task/json.py +111 -0
  696. synth_ai/task/proxy.py +251 -0
  697. synth_ai/task/rubrics/__init__.py +55 -0
  698. synth_ai/task/rubrics/loaders.py +156 -0
  699. synth_ai/task/rubrics/models.py +57 -0
  700. synth_ai/task/rubrics/scoring.py +116 -0
  701. synth_ai/task/rubrics/strict.py +149 -0
  702. synth_ai/task/rubrics.py +219 -0
  703. synth_ai/task/server.py +432 -0
  704. synth_ai/task/trace_correlation_helpers.py +328 -0
  705. synth_ai/task/tracing_utils.py +95 -0
  706. synth_ai/task/validators.py +449 -6
  707. synth_ai/task/vendors.py +59 -0
  708. synth_ai/tracing_v3/__init__.py +4 -0
  709. synth_ai/tracing_v3/abstractions.py +21 -4
  710. synth_ai/tracing_v3/config.py +167 -22
  711. synth_ai/tracing_v3/constants.py +21 -0
  712. synth_ai/tracing_v3/db_config.py +42 -29
  713. synth_ai/tracing_v3/decorators.py +80 -45
  714. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  715. synth_ai/tracing_v3/hooks.py +6 -4
  716. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  717. synth_ai/tracing_v3/migration_helper.py +1 -2
  718. synth_ai/tracing_v3/replica_sync.py +12 -7
  719. synth_ai/tracing_v3/serialization.py +130 -0
  720. synth_ai/tracing_v3/session_tracer.py +86 -21
  721. synth_ai/tracing_v3/storage/base.py +98 -12
  722. synth_ai/tracing_v3/storage/config.py +63 -16
  723. synth_ai/tracing_v3/storage/factory.py +11 -9
  724. synth_ai/tracing_v3/storage/utils.py +15 -11
  725. synth_ai/tracing_v3/trace_utils.py +317 -0
  726. synth_ai/tracing_v3/turso/__init__.py +8 -21
  727. synth_ai/tracing_v3/turso/daemon.py +123 -15
  728. synth_ai/tracing_v3/turso/models.py +5 -2
  729. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  730. synth_ai/tracing_v3/utils.py +5 -4
  731. synth_ai/tunnel.py +143 -0
  732. synth_ai/tunnel_deploy.py +278 -0
  733. synth_ai/types.py +8 -0
  734. synth_ai/urls.py +11 -0
  735. synth_ai/utils/__init__.py +166 -0
  736. synth_ai/utils/agents.py +74 -0
  737. synth_ai/utils/apps.py +152 -0
  738. synth_ai/utils/base_url.py +94 -0
  739. synth_ai/utils/bin.py +39 -0
  740. synth_ai/utils/claude.py +36 -0
  741. synth_ai/utils/cli.py +284 -0
  742. synth_ai/utils/config.py +81 -0
  743. synth_ai/utils/env.py +346 -0
  744. synth_ai/utils/errors.py +85 -0
  745. synth_ai/utils/http.py +172 -0
  746. synth_ai/utils/json.py +72 -0
  747. synth_ai/utils/log_filter.py +99 -0
  748. synth_ai/utils/logging.py +198 -0
  749. synth_ai/utils/modal.py +299 -0
  750. synth_ai/utils/paths.py +95 -0
  751. synth_ai/utils/process.py +233 -0
  752. synth_ai/utils/prompts.py +39 -0
  753. synth_ai/utils/sqld.py +122 -0
  754. synth_ai/utils/ssl.py +25 -0
  755. synth_ai/utils/task_app_discovery.py +882 -0
  756. synth_ai/utils/task_app_env.py +186 -0
  757. synth_ai/utils/task_app_state.py +318 -0
  758. synth_ai/utils/tunnel/__init__.py +12 -0
  759. synth_ai/utils/tunnel/config.py +55 -0
  760. synth_ai/utils/user_config.py +137 -0
  761. synth_ai/uvicorn.py +77 -0
  762. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  763. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  764. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  765. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  766. synth_ai/cli/man.py +0 -106
  767. synth_ai/core/experiment.py +0 -15
  768. synth_ai/core/system.py +0 -15
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -63
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/tracing/__init__.py +0 -30
  838. synth_ai/tracing_v1/__init__.py +0 -33
  839. synth_ai/tracing_v3/turso/manager.py +0 -760
  840. synth_ai/v0/tracing/abstractions.py +0 -224
  841. synth_ai/v0/tracing/base_client.py +0 -91
  842. synth_ai/v0/tracing/client_manager.py +0 -131
  843. synth_ai/v0/tracing/config.py +0 -142
  844. synth_ai/v0/tracing/context.py +0 -146
  845. synth_ai/v0/tracing/decorators.py +0 -682
  846. synth_ai/v0/tracing/events/__init__.py +0 -0
  847. synth_ai/v0/tracing/events/manage.py +0 -147
  848. synth_ai/v0/tracing/events/scope.py +0 -86
  849. synth_ai/v0/tracing/events/store.py +0 -228
  850. synth_ai/v0/tracing/immediate_client.py +0 -151
  851. synth_ai/v0/tracing/local.py +0 -18
  852. synth_ai/v0/tracing/log_client_base.py +0 -73
  853. synth_ai/v0/tracing/retry_queue.py +0 -186
  854. synth_ai/v0/tracing/trackers.py +0 -515
  855. synth_ai/v0/tracing/upload.py +0 -512
  856. synth_ai/v0/tracing/utils.py +0 -9
  857. synth_ai/v0/tracing_v1/__init__.py +0 -16
  858. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  859. synth_ai/v0/tracing_v1/base_client.py +0 -91
  860. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  861. synth_ai/v0/tracing_v1/config.py +0 -142
  862. synth_ai/v0/tracing_v1/context.py +0 -146
  863. synth_ai/v0/tracing_v1/decorators.py +0 -703
  864. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  865. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  866. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  867. synth_ai/v0/tracing_v1/events/store.py +0 -228
  868. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  869. synth_ai/v0/tracing_v1/local.py +0 -18
  870. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  871. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  872. synth_ai/v0/tracing_v1/trackers.py +0 -515
  873. synth_ai/v0/tracing_v1/upload.py +0 -527
  874. synth_ai/v0/tracing_v1/utils.py +0 -9
  875. synth_ai/zyk/__init__.py +0 -30
  876. synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
  877. synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
  878. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  879. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  880. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  881. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  882. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  885. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  886. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  887. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  888. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  889. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1252 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import logging
5
+ import os
6
+ from datetime import datetime
7
+ import asyncio
8
+ from typing import Any
9
+
10
+ from fastapi import APIRouter, HTTPException, Request
11
+ from pydantic import BaseModel
12
+
13
+ from synth_ai.task.auth import allowed_environment_api_keys, normalize_environment_api_key
14
+ from synth_ai.task.contracts import RolloutMode
15
+
16
+ from .envs.crafter.policy import CrafterPolicy
17
+ from .inference.openai_client import create_inference_client
18
+ from .registry import registry
19
+ from .storage.volume import storage
20
+ from .utils import ensure_chat_completions_url
21
+
22
+ # Token budgeting (shared logic with inference server)
23
+ try:
24
+ from ..core.algorithms.gspo.inference.token_limits import (
25
+ clamp_effective_max_ctx,
26
+ )
27
+ except Exception: # pragma: no cover - defensive import path fallback
28
+ clamp_effective_max_ctx = None # type: ignore
29
+
30
+ try:
31
+ import tiktoken # type: ignore
32
+ except Exception: # pragma: no cover
33
+ tiktoken = None # type: ignore
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ router = APIRouter()
38
+
39
+ # Global concurrency limit for outbound inference to avoid backend overload/timeouts
40
+ try:
41
+ _INFERENCE_CONCURRENCY = int(os.getenv("INFERENCE_CONCURRENCY", "2") or "2")
42
+ except Exception: # pragma: no cover
43
+ _INFERENCE_CONCURRENCY = 2
44
+ _inference_sem = asyncio.Semaphore(max(1, _INFERENCE_CONCURRENCY))
45
+
46
+
47
+ class PolicyCreateRequest(BaseModel):
48
+ policy_name: str
49
+ config: dict[str, Any] = {}
50
+ parent_policy_id: str | None = None
51
+ rl_run_id: str
52
+ bound_env_id: str | None = None
53
+ mode: RolloutMode
54
+
55
+
56
+ class PolicyCreateResponse(BaseModel):
57
+ policy_id: str
58
+
59
+
60
+ class PolicyStepRequest(BaseModel):
61
+ policy_id: str
62
+ observation: dict[str, Any]
63
+ state: dict[str, Any] | None = None
64
+ metadata: dict[str, Any] | None = None
65
+ dry_run: bool = False
66
+
67
+
68
+ class PolicyStepResponse(BaseModel):
69
+ tool_calls: list[dict[str, Any]]
70
+ meta: dict[str, Any]
71
+
72
+
73
+ class PolicySnapshotRequest(BaseModel):
74
+ policy_id: str
75
+
76
+
77
+ class PolicySnapshotResponse(BaseModel):
78
+ snapshot_id: str
79
+ path: str
80
+ rl_run_id: str
81
+ size: int
82
+
83
+
84
+ class PolicyRestoreRequest(BaseModel):
85
+ snapshot_id: str
86
+
87
+
88
+ class PolicyRestoreResponse(BaseModel):
89
+ policy_id: str
90
+
91
+
92
+ class PolicyTerminateRequest(BaseModel):
93
+ policy_id: str
94
+
95
+
96
+ class PolicyTerminateResponse(BaseModel):
97
+ ok: bool
98
+
99
+
100
+ @router.post("/create", response_model=PolicyCreateResponse)
101
+ async def create_policy(
102
+ request: PolicyCreateRequest,
103
+ req: Request,
104
+ ) -> PolicyCreateResponse:
105
+ """Create a new policy instance."""
106
+ try:
107
+ task_app = getattr(req.app.state, "task_app", None)
108
+
109
+ # Set defaults from TaskApp / environment if not provided
110
+ config = dict(request.config or {})
111
+ provider_raw = config.get("provider") or config.get("vendor")
112
+ provider = str(provider_raw).strip().lower() if provider_raw else None
113
+
114
+ # Resolve base URL for proxy endpoints (strip trailing slash)
115
+ base_url = str(req.base_url).rstrip("/")
116
+
117
+ if provider == "groq":
118
+ # Route through in-app Groq proxy by default
119
+ config.setdefault("inference_url", f"{base_url}/proxy/groq")
120
+ # Default to a recent Groq-hosted Qwen unless caller overrides
121
+ preferred_model = "qwen/qwen3-32b"
122
+ config.setdefault("model", preferred_model)
123
+ # Groq Qwen defaults tuned for deterministic tool use
124
+ config.setdefault("temperature", 0.0)
125
+ config.setdefault("top_p", 0.95)
126
+ config.setdefault("max_tokens", 256)
127
+ # Avoid leaking provider in downstream policy if unset
128
+ config["provider"] = "groq"
129
+ elif provider == "openai":
130
+ config.setdefault("inference_url", f"{base_url}/proxy")
131
+ config["provider"] = "openai"
132
+
133
+ received_url = config.get("inference_url")
134
+ logger.info(
135
+ "POLICY_CREATE: policy=%s provider=%s raw_inference_url=%s",
136
+ request.policy_name,
137
+ provider,
138
+ received_url,
139
+ )
140
+
141
+ if "inference_url" not in config and task_app is not None:
142
+ task_base_url = getattr(task_app, "vllm_base_url", None)
143
+ if task_base_url:
144
+ config["inference_url"] = task_base_url
145
+ if "model" not in config and task_app is not None:
146
+ default_model = getattr(task_app, "default_model", None)
147
+ if default_model:
148
+ config["model"] = default_model
149
+ if "inference_url" not in config or "model" not in config:
150
+ raise HTTPException(
151
+ status_code=422,
152
+ detail="Policy configuration must include 'inference_url' and 'model'.",
153
+ )
154
+
155
+ # Get mode from PolicyCreateRequest (defaults to "rl" for backward compatibility)
156
+ mode = request.mode
157
+ logger.info("POLICY_CREATE: Using mode=%s for URL processing", mode)
158
+
159
+ sanitized_url = ensure_chat_completions_url(config.get("inference_url"), mode=mode)
160
+ if isinstance(sanitized_url, str) and sanitized_url:
161
+ if sanitized_url != config.get("inference_url"):
162
+ logger.warning(
163
+ "POLICY_CREATE: normalized inference_url for policy=%s provider=%s mode=%s from %s to %s",
164
+ request.policy_name,
165
+ provider,
166
+ mode,
167
+ config.get("inference_url"),
168
+ sanitized_url,
169
+ )
170
+ config["inference_url"] = sanitized_url
171
+ else:
172
+ logger.warning(
173
+ "POLICY_CREATE: unable to normalize inference_url for policy=%s provider=%s mode=%s raw=%s",
174
+ request.policy_name,
175
+ mode,
176
+ provider,
177
+ config.get("inference_url"),
178
+ )
179
+
180
+ # Create policy instance based on name
181
+ pname = request.policy_name.lower()
182
+ if pname in ["crafter-react", "crafter"]:
183
+ policy = CrafterPolicy(
184
+ inference_url=config["inference_url"],
185
+ model=config["model"],
186
+ )
187
+ await policy.initialize(config)
188
+ elif pname in ["wordle-react", "wordle"]:
189
+ try:
190
+ from .envs.wordle.policy import WordlePolicy
191
+ except Exception as e:
192
+ raise HTTPException(
193
+ status_code=500, detail=f"Wordle policy unavailable: {e}"
194
+ ) from e
195
+
196
+ policy = WordlePolicy(
197
+ inference_url=config["inference_url"],
198
+ model=config["model"],
199
+ word_length=int(config["word_length"]),
200
+ max_guesses=int(config["max_guesses"]),
201
+ )
202
+ await policy.initialize(config)
203
+ elif pname in ["sokoban-react", "sokoban"]:
204
+ try:
205
+ from .envs.sokoban.policy import SokobanPolicy
206
+ except Exception as e:
207
+ raise HTTPException(
208
+ status_code=500, detail=f"Sokoban policy unavailable: {e}"
209
+ ) from e
210
+
211
+ policy = SokobanPolicy(
212
+ inference_url=config["inference_url"],
213
+ model=config["model"],
214
+ )
215
+ await policy.initialize(config)
216
+ elif pname in ["math-react", "math"]:
217
+ try:
218
+ from .envs.math.policy import MathPolicy
219
+ except Exception as e:
220
+ raise HTTPException(status_code=500, detail=f"Math policy unavailable: {e}") from e
221
+
222
+ policy = MathPolicy(
223
+ inference_url=config["inference_url"],
224
+ model=config["model"],
225
+ )
226
+ await policy.initialize(config)
227
+ else:
228
+ raise HTTPException(
229
+ status_code=422,
230
+ detail=f"Unknown policy name: {request.policy_name}",
231
+ )
232
+
233
+ # Register in memory
234
+ policy_id = registry.register_policy(
235
+ policy=policy,
236
+ rl_run_id=request.rl_run_id,
237
+ bound_env_id=request.bound_env_id,
238
+ )
239
+
240
+ return PolicyCreateResponse(policy_id=policy_id)
241
+
242
+ except Exception as e:
243
+ logger.error(f"Failed to create policy: {e}")
244
+ raise HTTPException(status_code=500, detail=str(e)) from e
245
+
246
+
247
+ @router.post("/step", response_model=PolicyStepResponse)
248
+ async def step_policy(
249
+ request: PolicyStepRequest,
250
+ req: Request,
251
+ ) -> PolicyStepResponse:
252
+ """Execute a policy step to generate actions."""
253
+ handle = registry.get_policy(request.policy_id)
254
+ if not handle:
255
+ raise HTTPException(status_code=404, detail=f"Policy {request.policy_id} not found")
256
+
257
+ try:
258
+ task_app = req.app.state.task_app
259
+ policy = handle.policy
260
+ tracing_context = getattr(req.state, "rollout_tracing", None)
261
+ if tracing_context is None:
262
+ print(
263
+ f"[TRACE_DEBUG] Missing tracing context on policy step; policy_id={request.policy_id}",
264
+ flush=True,
265
+ )
266
+
267
+ obs_text = request.observation
268
+ if isinstance(request.observation, dict):
269
+ if isinstance(policy, CrafterPolicy):
270
+ from .envs.crafter.shared import format_observation as format_crafter
271
+
272
+ obs_text = format_crafter(request.observation)
273
+ else:
274
+ formatted: str | None = None
275
+
276
+ # Wordle formatting
277
+ try:
278
+ from .envs.wordle.policy import WordlePolicy
279
+ except Exception:
280
+ wordle_policy_cls = None # type: ignore[assignment]
281
+ else:
282
+ wordle_policy_cls = WordlePolicy
283
+
284
+ if formatted is None and wordle_policy_cls is not None and isinstance(
285
+ policy, wordle_policy_cls
286
+ ):
287
+ from .envs.wordle.shared import format_observation_wordle
288
+
289
+ # ASSERTION: Validate observation structure
290
+ assert request.observation is not None, "request.observation cannot be None"
291
+ assert isinstance(request.observation, dict), (
292
+ f"request.observation must be dict, got {type(request.observation)}"
293
+ )
294
+
295
+ required_keys = {
296
+ "text",
297
+ "status",
298
+ "remaining_guesses",
299
+ "guesses",
300
+ "feedback",
301
+ "reward_last",
302
+ "total_reward",
303
+ "terminated",
304
+ }
305
+ missing_keys = required_keys - set(request.observation.keys())
306
+ assert (
307
+ not missing_keys
308
+ ), f"Wordle observation missing required keys: {missing_keys}"
309
+
310
+ print("DEBUG POLICY_ROUTES: About to format Wordle observation")
311
+ print(f"DEBUG POLICY_ROUTES: Observation type: {type(request.observation)}")
312
+ print(
313
+ f"DEBUG POLICY_ROUTES: Observation keys: {list(request.observation.keys())}"
314
+ )
315
+ feedback_val = request.observation["feedback"]
316
+ print(f"DEBUG POLICY_ROUTES: Observation feedback: {feedback_val}")
317
+ print(
318
+ f"DEBUG POLICY_ROUTES: Observation guesses: {request.observation['guesses']}"
319
+ )
320
+ print(
321
+ "DEBUG POLICY_ROUTES: Observation text length: "
322
+ f"{len(request.observation['text'])}"
323
+ )
324
+
325
+ guesses = request.observation["guesses"]
326
+ feedback = request.observation["feedback"]
327
+ assert isinstance(guesses, list), f"guesses must be list, got {type(guesses)}"
328
+ assert isinstance(
329
+ feedback, list
330
+ ), f"feedback must be list, got {type(feedback)}"
331
+
332
+ formatted = format_observation_wordle(request.observation)
333
+
334
+ assert isinstance(formatted, str), (
335
+ f"obs_text must be string, got {type(formatted)}"
336
+ )
337
+ assert len(formatted) > 0, "obs_text cannot be empty"
338
+ assert "WORDLE" in formatted, "obs_text must contain 'WORDLE' header"
339
+ assert "Respond with a single tool call" in formatted, (
340
+ "obs_text must contain instruction text"
341
+ )
342
+
343
+ print(
344
+ f"DEBUG POLICY_ROUTES: Formatted obs_text length: {len(formatted)}"
345
+ )
346
+ print(
347
+ "DEBUG POLICY_ROUTES: Formatted obs_text contains 🟩: "
348
+ f"{'🟩' in formatted}"
349
+ )
350
+ print(
351
+ "DEBUG POLICY_ROUTES: Formatted obs_text contains 🟨: "
352
+ f"{'🟨' in formatted}"
353
+ )
354
+ print(
355
+ "DEBUG POLICY_ROUTES: Formatted obs_text contains ⬛: "
356
+ f"{'⬛' in formatted}"
357
+ )
358
+ print(
359
+ "DEBUG POLICY_ROUTES: Formatted obs_text first 200 chars: "
360
+ f"{formatted[:200]}"
361
+ )
362
+
363
+ # Sokoban formatting
364
+ try:
365
+ from .envs.sokoban.policy import SokobanPolicy
366
+ except Exception:
367
+ sokoban_policy_cls = None # type: ignore[assignment]
368
+ else:
369
+ sokoban_policy_cls = SokobanPolicy
370
+
371
+ if formatted is None and sokoban_policy_cls is not None and isinstance(
372
+ policy, sokoban_policy_cls
373
+ ):
374
+ from .envs.sokoban.shared import format_observation_sokoban
375
+
376
+ formatted = format_observation_sokoban(request.observation)
377
+
378
+ # Math formatting
379
+ try:
380
+ from .envs.math.policy import MathPolicy
381
+ except Exception:
382
+ math_policy_cls = None # type: ignore[assignment]
383
+ else:
384
+ math_policy_cls = MathPolicy
385
+
386
+ if formatted is None and math_policy_cls is not None and isinstance(
387
+ policy, math_policy_cls
388
+ ):
389
+ try:
390
+ formatted = str(
391
+ request.observation.get("problem_text") or request.observation
392
+ )
393
+ except Exception:
394
+ formatted = str(request.observation)
395
+
396
+ if formatted is None:
397
+ formatted = str(request.observation)
398
+
399
+ obs_text = formatted
400
+
401
+ # Merge metadata with raw observation for multimodal policies
402
+ step_metadata: dict[str, Any] = dict(request.metadata or {})
403
+ step_metadata["raw_observation"] = request.observation
404
+
405
+ # Execute policy step to get inference request
406
+ tool_calls, meta = await policy.step(
407
+ observation_text=obs_text,
408
+ state=request.state,
409
+ metadata=step_metadata,
410
+ )
411
+ # Compact tool call summary
412
+ with contextlib.suppress(Exception):
413
+ _summary: list[dict[str, Any]] = []
414
+ _tc = tool_calls or []
415
+ for _item in (_tc if isinstance(_tc, list) else []):
416
+ if isinstance(_item, dict):
417
+ _tool = _item.get("tool")
418
+ _args = _item.get("args")
419
+ _keys = list(_args.keys()) if isinstance(_args, dict) else []
420
+ _summary.append({"tool": _tool, "args_keys": _keys})
421
+ logger.info(
422
+ "POLICY_STEP: tool_calls=%d summary=%s",
423
+ len(_tc),
424
+ _summary,
425
+ )
426
+
427
+ # If not dry run, perform inference
428
+ if not request.dry_run and "inference_request" in meta:
429
+ # CRITICAL: Validate that the inference request contains the correct prompts for the policy
430
+ inf_req = meta["inference_request"]
431
+ msgs = inf_req["messages"]
432
+ model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
433
+ if msgs and len(msgs) > 0 and msgs[0]["role"] == "system":
434
+ sys_text = msgs[0]["content"]
435
+ policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
436
+
437
+ # Assert environment-specific prompts match the policy
438
+ if policy_name in ("wordle-react", "wordle"):
439
+ if "Wordle" not in sys_text:
440
+ raise ValueError(
441
+ f"PROMPT MISMATCH: Wordle policy {policy_name} received system prompt without 'Wordle' keyword: {sys_text[:200]}..."
442
+ )
443
+ if "Crafter" in sys_text:
444
+ raise ValueError(
445
+ f"PROMPT MISMATCH: Wordle policy {policy_name} received Crafter system prompt: {sys_text[:200]}..."
446
+ )
447
+
448
+ elif policy_name in ("crafter-react", "crafter") or isinstance(
449
+ policy, CrafterPolicy
450
+ ):
451
+ if "Crafter" not in sys_text:
452
+ raise ValueError(
453
+ f"PROMPT MISMATCH: Crafter policy {policy_name} received system prompt without 'Crafter' keyword: {sys_text[:200]}..."
454
+ )
455
+ if "Wordle" in sys_text:
456
+ raise ValueError(
457
+ f"PROMPT MISMATCH: Crafter policy {policy_name} received Wordle system prompt: {sys_text[:200]}..."
458
+ )
459
+ elif policy_name in ("sokoban-react", "sokoban"):
460
+ if "Sokoban" not in sys_text:
461
+ raise ValueError(
462
+ f"PROMPT MISMATCH: Sokoban policy {policy_name} received system prompt without 'Sokoban' keyword: {sys_text[:200]}..."
463
+ )
464
+ if "Crafter" in sys_text or "Wordle" in sys_text:
465
+ raise ValueError(
466
+ f"PROMPT MISMATCH: Sokoban policy {policy_name} received wrong environment system prompt: {sys_text[:200]}..."
467
+ )
468
+
469
+ logger.info(
470
+ f"✅ PROMPT VALIDATION: {policy_name} policy has correct system prompt containing expected environment keywords"
471
+ )
472
+ else:
473
+ logger.warning(
474
+ f"⚠️ PROMPT VALIDATION: No system message found in inference request for policy {getattr(policy, 'name', type(policy).__name__)}"
475
+ )
476
+
477
+ # Emit full system/user prompts for observability (no secrets included)
478
+ system_prompt_records: list[dict[str, Any]] = []
479
+ user_prompt_records: list[dict[str, Any]] = []
480
+ try:
481
+
482
+ def _as_text(content: object) -> str:
483
+ if isinstance(content, str):
484
+ return content
485
+ if isinstance(content, list):
486
+ # Concatenate any dict segments that resemble OpenAI content parts
487
+ parts: list[str] = []
488
+ for seg in content:
489
+ try:
490
+ if isinstance(seg, dict):
491
+ txt = seg.get("text") or seg.get("content") or ""
492
+ if isinstance(txt, str):
493
+ parts.append(txt)
494
+ except Exception:
495
+ continue
496
+ return "".join(parts)
497
+ return str(content)
498
+
499
+ for message in msgs:
500
+ role = message.get("role")
501
+ raw_content = message.get("content")
502
+ content = _as_text(raw_content)
503
+ record = {"role": role, "text": content, "content": raw_content}
504
+ if role == "system":
505
+ system_prompt_records.append(record)
506
+ elif role == "user":
507
+ user_prompt_records.append(record)
508
+
509
+ last_user_chars = (
510
+ len(user_prompt_records[-1].get("text", "")) if user_prompt_records else 0
511
+ )
512
+ logger.info(
513
+ "PROMPTS: system_msgs=%d user_msgs=%d last_user_chars=%d (content suppressed)",
514
+ len(system_prompt_records),
515
+ len(user_prompt_records),
516
+ last_user_chars,
517
+ )
518
+
519
+ log_prompt_details = (
520
+ os.getenv("CRAFT_LOG_PROMPTS", "").strip().lower()
521
+ in {"1", "true", "yes", "debug"}
522
+ )
523
+ if log_prompt_details:
524
+ if system_prompt_records:
525
+ logger.info("PROMPT_DETAILS_SYSTEM_BEGIN")
526
+ for idx, rec in enumerate(system_prompt_records):
527
+ smsg = rec.get("text", "")
528
+ logger.info("SYSTEM[%d]: %s", idx, smsg)
529
+ logger.info("PROMPT_DETAILS_SYSTEM_END")
530
+ if user_prompt_records:
531
+ logger.info("PROMPT_DETAILS_USER_BEGIN")
532
+ for idx, rec in enumerate(user_prompt_records):
533
+ umsg = rec.get("text", "")
534
+ logger.info("USER[%d]: %s", idx, umsg)
535
+ logger.info("PROMPT_DETAILS_USER_END")
536
+ except Exception as e:
537
+ logger.warning(f"PROMPT_DUMP_FAILED: {e}")
538
+
539
+ if tracing_context is not None:
540
+ try:
541
+ logger.info(
542
+ "[TRACE_DEBUG] record_policy_prompts sys=%s user=%s",
543
+ len(system_prompt_records),
544
+ len(user_prompt_records),
545
+ )
546
+ await tracing_context.record_policy_prompts(
547
+ system_prompt_records, user_prompt_records
548
+ )
549
+ except Exception as exc:
550
+ logger.debug(f"TRACING_PROMPTS_FAIL: {exc}")
551
+
552
+ # Create inference client (choose API key by target provider)
553
+ # Require inference_url to be set explicitly by the rollout policy config.
554
+ target_url = (
555
+ meta.get("inference_url")
556
+ or getattr(policy, "inference_url", None)
557
+ or getattr(task_app, "vllm_base_url", None)
558
+ )
559
+
560
+ # Ensure meta carries the final target URL for downstream logging/clients
561
+ with contextlib.suppress(Exception):
562
+ # Bulletproof normalizer at the call site (in addition to client-side)
563
+ try:
564
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.utils import (
565
+ force_normalize_chat_completions_url,
566
+ )
567
+ target_url = force_normalize_chat_completions_url(target_url)
568
+ except Exception:
569
+ pass
570
+ sanitized_target = ensure_chat_completions_url(target_url)
571
+ if sanitized_target and sanitized_target != target_url:
572
+ logger.warning(
573
+ "POLICY_STEP: normalized inference_url mid-flight policy=%s from %s to %s",
574
+ policy_name,
575
+ target_url,
576
+ sanitized_target,
577
+ )
578
+ elif not sanitized_target:
579
+ logger.info(
580
+ "POLICY_STEP: inference_url unchanged policy=%s target=%s",
581
+ policy_name,
582
+ target_url,
583
+ )
584
+ meta["inference_url"] = sanitized_target if sanitized_target else target_url
585
+ target_url = sanitized_target or target_url
586
+
587
+ # Select API key based on resolved target URL
588
+ api_key_override = None
589
+ try:
590
+ import os as _os
591
+
592
+ if isinstance(target_url, str):
593
+ low_url = target_url.lower()
594
+ # Proxy endpoints should not receive a bearer; the server-side proxy holds the vendor key
595
+ if "/proxy/groq" in low_url or "/proxy/openai" in low_url:
596
+ api_key_override = None
597
+ elif "openai.com" in low_url:
598
+ api_key_override = _os.getenv("OPENAI_API_KEY") or getattr(
599
+ task_app, "openai_api_key", None
600
+ )
601
+ elif "groq.com" in low_url or "/proxy/groq" in low_url:
602
+ api_key_override = _os.getenv("GROQ_API_KEY")
603
+ else:
604
+ api_key_override = (
605
+ _os.getenv("SYNTH_API_KEY")
606
+ or _os.getenv("OPENAI_API_KEY")
607
+ or getattr(task_app, "openai_api_key", None)
608
+ )
609
+ else:
610
+ api_key_override = (
611
+ _os.getenv("SYNTH_API_KEY")
612
+ or _os.getenv("OPENAI_API_KEY")
613
+ or getattr(task_app, "openai_api_key", None)
614
+ )
615
+ except Exception:
616
+ api_key_override = None
617
+
618
+ # Fallback: If target is OpenAI but OPENAI_API_KEY is missing, route to Synth API
619
+ try:
620
+ import os as _os2
621
+ _low = str(target_url or "").lower()
622
+ if ("api.openai.com" in _low) and not (_os2.getenv("OPENAI_API_KEY")):
623
+ # Prefer task_app.synth_base_url if available; else default
624
+ synth_base = getattr(task_app, "synth_base_url", None)
625
+ if isinstance(synth_base, str) and synth_base.strip():
626
+ base = synth_base.rstrip("/")
627
+ fallback = base + "/inference/v1/chat/completions"
628
+ else:
629
+ fallback = "https://api.synth.run/api/inference/v1/chat/completions"
630
+ fixed = ensure_chat_completions_url(fallback)
631
+ logger.warning(
632
+ "POLICY_STEP: OPENAI key missing; falling back to Synth route %s",
633
+ fixed,
634
+ )
635
+ meta["inference_url"] = fixed
636
+ target_url = fixed
637
+ except Exception:
638
+ pass
639
+
640
+ if api_key_override:
641
+ try:
642
+ masked = f"{api_key_override[:6]}…{api_key_override[-4:]}"
643
+ except Exception:
644
+ masked = "<masked>"
645
+ logger.debug(f"INFERENCE_AUTH: Using bearer key {masked}")
646
+ else:
647
+ logger.debug(
648
+ "INFERENCE_AUTH: No bearer key resolved for inference request (expected when using in-app proxy)"
649
+ )
650
+
651
+ client = create_inference_client(task_app, api_key=api_key_override)
652
+
653
+ # Add policy identification header and task auth for proxy fallback
654
+ policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
655
+ extra_headers = {"X-Policy-Name": policy_name}
656
+ try:
657
+ env_key = normalize_environment_api_key()
658
+ if not env_key:
659
+ allowed_keys = allowed_environment_api_keys()
660
+ if allowed_keys:
661
+ env_key = next(iter(sorted(allowed_keys)))
662
+ if isinstance(env_key, str) and env_key:
663
+ extra_headers["X-API-Key"] = env_key
664
+ else:
665
+ logger.warning(
666
+ "INFERENCE_AUTH: Failed to resolve ENVIRONMENT_API_KEY for proxy request headers"
667
+ )
668
+ except Exception as exc:
669
+ logger.warning(f"INFERENCE_AUTH: Error resolving ENVIRONMENT_API_KEY: {exc}")
670
+
671
+ # Apply input truncation to avoid 422 from inference server
672
+ try:
673
+ model_name = inf_req.get("model") or getattr(policy, "model", None) or ""
674
+ env_max_ctx = None
675
+ try:
676
+ _env_max = int(os.getenv("CHAT_MAX_MODEL_LEN", "0") or 0)
677
+ env_max_ctx = _env_max if _env_max > 0 else None
678
+ except Exception:
679
+ env_max_ctx = None
680
+ # Compute effective max context and safety margin
681
+ eff_ctx = None
682
+ if clamp_effective_max_ctx is not None:
683
+ eff_ctx = clamp_effective_max_ctx(
684
+ model_name=model_name,
685
+ configured_max_model_len=None,
686
+ env_max_model_len=env_max_ctx,
687
+ )
688
+ # Hard lower-only chat input cap if provided
689
+ try:
690
+ hard_input_cap = int(os.getenv("CHAT_MAX_INPUT_TOKENS", "0") or 0)
691
+ hard_input_cap = hard_input_cap if hard_input_cap > 0 else None
692
+ except Exception:
693
+ hard_input_cap = None
694
+ try:
695
+ safety_margin = int(os.getenv("CHAT_BUDGET_SAFETY", "64").strip() or 64)
696
+ except Exception:
697
+ safety_margin = 64
698
+
699
+ # Determine budget
700
+ budget = None
701
+ if isinstance(eff_ctx, int) and eff_ctx > 0:
702
+ budget = max(256, eff_ctx - safety_margin)
703
+ if isinstance(hard_input_cap, int) and hard_input_cap > 0:
704
+ budget = min(budget, hard_input_cap) if budget is not None else hard_input_cap
705
+
706
+ if budget is not None and budget > 0 and isinstance(msgs, list):
707
+ # Choose tokenizer
708
+ enc = None
709
+ if tiktoken is not None:
710
+ try:
711
+ if model_name:
712
+ enc = tiktoken.encoding_for_model(model_name)
713
+ else:
714
+ enc = tiktoken.get_encoding("cl100k_base")
715
+ except Exception:
716
+ try:
717
+ enc = tiktoken.get_encoding("cl100k_base")
718
+ except Exception:
719
+ enc = None
720
+
721
+ def _content_to_text(content: object) -> str:
722
+ if isinstance(content, str):
723
+ return content
724
+ if isinstance(content, list):
725
+ parts: list[str] = []
726
+ for seg in content:
727
+ try:
728
+ if isinstance(seg, dict):
729
+ txt = seg.get("text") or seg.get("content") or ""
730
+ if isinstance(txt, str):
731
+ parts.append(txt)
732
+ except Exception:
733
+ continue
734
+ return "".join(parts)
735
+ try:
736
+ return str(content)
737
+ except Exception:
738
+ return ""
739
+
740
+ def _count_tokens(text: str) -> int:
741
+ if enc is None:
742
+ # Fall back to character count heuristic (~4 chars per token)
743
+ try:
744
+ return max(1, int(len(text) / 4))
745
+ except Exception:
746
+ return len(text)
747
+ try:
748
+ return len(enc.encode(text))
749
+ except Exception:
750
+ return max(1, int(len(text) / 4))
751
+
752
+ def _count_messages_tokens(messages: list[dict[str, Any]]) -> int:
753
+ total = 0
754
+ for m in messages:
755
+ total += _count_tokens(_content_to_text(m.get("content")))
756
+ return total
757
+
758
+ def _truncate_messages_to_budget(
759
+ messages: list[dict[str, Any]],
760
+ max_tokens: int,
761
+ ) -> tuple[list[dict[str, Any]], int, int, int]:
762
+ before = _count_messages_tokens(messages)
763
+ if before <= max_tokens:
764
+ return messages, before, before, len(messages)
765
+ # Always try to preserve the first system message if present
766
+ system_msg = None
767
+ start_idx = 0
768
+ if messages and messages[0].get("role") == "system":
769
+ system_msg = messages[0]
770
+ start_idx = 1
771
+ kept_rev: list[dict[str, Any]] = []
772
+ total = _count_messages_tokens([system_msg] if system_msg else [])
773
+ # Walk from the end keeping most recent messages
774
+ for m in reversed(messages[start_idx:]):
775
+ t = _count_tokens(_content_to_text(m.get("content")))
776
+ if total + t <= max_tokens:
777
+ kept_rev.append(m)
778
+ total += t
779
+ else:
780
+ # Try to keep a truncated version of this message if we have some budget left
781
+ remaining = max_tokens - total
782
+ if remaining > 16: # keep at least a little context
783
+ txt = _content_to_text(m.get("content"))
784
+ # Binary search-ish trim by tokens
785
+ low, high = 0, len(txt)
786
+ best = None
787
+ while low <= high:
788
+ mid = (low + high) // 2
789
+ candidate = txt[-mid:]
790
+ if _count_tokens(candidate) <= remaining:
791
+ best = candidate
792
+ low = mid + 1
793
+ else:
794
+ high = mid - 1
795
+ if best is not None and best:
796
+ m2 = dict(m)
797
+ m2["content"] = best
798
+ kept_rev.append(m2)
799
+ total += _count_tokens(best)
800
+ break
801
+ kept = list(reversed(kept_rev))
802
+ if system_msg is not None:
803
+ kept = [system_msg] + kept
804
+ after = _count_messages_tokens(kept)
805
+ return kept, before, after, len(kept)
806
+
807
+ new_msgs, before_toks, after_toks, kept_count = _truncate_messages_to_budget(
808
+ msgs, int(budget)
809
+ )
810
+ if new_msgs is not msgs:
811
+ inf_req["messages"] = new_msgs
812
+ with contextlib.suppress(Exception):
813
+ logger.info(
814
+ {
815
+ "chat_truncated": True,
816
+ "token_budget": int(budget),
817
+ "before_tokens": int(before_toks),
818
+ "after_tokens": int(after_toks),
819
+ "kept_msgs": int(kept_count),
820
+ }
821
+ )
822
+ except Exception as _trunc_e:
823
+ logger.warning(f"CHAT_TRUNCATION_FAILED: {type(_trunc_e).__name__}: {_trunc_e}")
824
+
825
+ # Formal assertion: If tools are expected, ensure tool_choice and tools are set
826
+ if policy_name in (
827
+ "wordle-react",
828
+ "sokoban-react",
829
+ "crafter-react",
830
+ ) and getattr(policy, "use_tools", True):
831
+ inf_req = meta.get("inference_request", {})
832
+ req_tools = inf_req.get("tools")
833
+ req_tool_choice = inf_req.get("tool_choice")
834
+ req_stop_after = inf_req.get("stop_after_tool_calls")
835
+ logger.info(
836
+ f"TOOLCALL_CONFIG: policy={policy_name} tools_present={bool(req_tools)} tool_choice={req_tool_choice} stop_after={req_stop_after}"
837
+ )
838
+ if not req_tools or req_tool_choice != "required":
839
+ raise HTTPException(
840
+ status_code=500,
841
+ detail=f"TOOLCALL_ASSERTION_FAIL: Missing tools or tool_choice!=required for policy {policy_name}",
842
+ )
843
+ if req_stop_after is None:
844
+ inf_req["stop_after_tool_calls"] = 1
845
+
846
+ # Call inference service with retries for Flash cold-start (503)
847
+ import time as _t
848
+
849
+ # Prompt diagnostics before sending to inference: build chat template locally,
850
+ # count tokens, and log the first 10k tokens if oversized. Also stash a
851
+ # compact preview in meta so the trainer can surface it.
852
+ with contextlib.suppress(Exception):
853
+ req_for_diag = meta.get("inference_request", {})
854
+ model_for_diag = req_for_diag.get("model") or getattr(policy, "model", None) or ""
855
+ messages_for_diag = req_for_diag.get("messages") or []
856
+ if model_for_diag and messages_for_diag:
857
+ from transformers import AutoTokenizer
858
+
859
+ tok = AutoTokenizer.from_pretrained(model_for_diag)
860
+ prompt_preview = tok.apply_chat_template(
861
+ messages_for_diag,
862
+ add_generation_prompt=True,
863
+ tokenize=False,
864
+ )
865
+ ids = tok.encode(prompt_preview, add_special_tokens=False)
866
+ max_len = getattr(tok, "model_max_length", None)
867
+ over_limit = False
868
+ with contextlib.suppress(Exception):
869
+ over_limit = (
870
+ isinstance(max_len, int) and max_len > 0 and len(ids) > int(max_len)
871
+ )
872
+ if over_limit or len(ids) > 10000:
873
+ preview_ids = ids[:10000]
874
+ preview_text = tok.decode(
875
+ preview_ids,
876
+ skip_special_tokens=False,
877
+ )
878
+ with contextlib.suppress(Exception):
879
+ logger.warning(
880
+ {
881
+ "prompt_token_overflow_local": True,
882
+ "model": str(model_for_diag),
883
+ "token_count": int(len(ids)),
884
+ "model_max_length": int(max_len)
885
+ if isinstance(max_len, int)
886
+ else None,
887
+ "preview_tokens_logged": int(len(preview_ids)),
888
+ "prompt_preview_first_10k_tokens": preview_text,
889
+ }
890
+ )
891
+ with contextlib.suppress(Exception):
892
+ meta["prompt_debug"] = {
893
+ "token_count": int(len(ids)),
894
+ "model_max_length": int(max_len)
895
+ if isinstance(max_len, int)
896
+ else None,
897
+ "preview_first_10k_tokens": preview_text,
898
+ }
899
+
900
+ # Emit the exact prompt/messages and tools before calling the LLM (bounded preview)
901
+ # Do not print prompts; only log response content later
902
+
903
+ # Normalize request for non-OpenAI endpoints (strict schemas)
904
+ with contextlib.suppress(Exception):
905
+ base = str(target_url or "")
906
+ is_openai_dotcom = "openai.com" in base.lower()
907
+ if not is_openai_dotcom:
908
+ req_body = meta.get("inference_request", {})
909
+ if isinstance(req_body, dict):
910
+ # Force structured tool_choice if a bare "required" is present
911
+ if req_body.get("tool_choice") == "required":
912
+ func_name = "interact_many"
913
+ with contextlib.suppress(Exception):
914
+ tools_arr = req_body.get("tools") or []
915
+ if isinstance(tools_arr, list) and tools_arr:
916
+ f = (
917
+ tools_arr[0].get("function")
918
+ if isinstance(tools_arr[0], dict)
919
+ else None
920
+ )
921
+ cand = (f or {}).get("name") if isinstance(f, dict) else None
922
+ if isinstance(cand, str) and cand:
923
+ func_name = cand
924
+ req_body["tool_choice"] = {
925
+ "type": "function",
926
+ "function": {"name": func_name},
927
+ }
928
+ req_body["parallel_tool_calls"] = False
929
+ req_body.setdefault("function_call", {"name": func_name})
930
+ # Inject extra_body for thinking controls expected by Modal service
931
+ with contextlib.suppress(Exception):
932
+ tb = req_body.get("thinking_budget")
933
+ tm = str(req_body.get("thinking_mode") or "").lower()
934
+ enable_thinking = bool(tb) or tm == "think"
935
+ extra = dict(req_body.get("extra_body") or {})
936
+ chat_kwargs = dict(extra.get("chat_template_kwargs") or {})
937
+ if enable_thinking:
938
+ chat_kwargs["enable_thinking"] = True
939
+ if isinstance(tb, int | float | str) and str(tb).strip():
940
+ with contextlib.suppress(Exception):
941
+ chat_kwargs["thinking_budget"] = int(tb)
942
+ if chat_kwargs:
943
+ extra["chat_template_kwargs"] = chat_kwargs
944
+ # Ensure stop_after_tool_calls honored via extra_body for stricter servers
945
+ extra.setdefault("stop_after_tool_calls", 1)
946
+ if extra:
947
+ req_body["extra_body"] = extra
948
+ # Provide a conservative default temperature if missing
949
+ if "temperature" not in req_body:
950
+ req_body["temperature"] = 0.1
951
+ meta["inference_request"] = req_body
952
+
953
+ # Message flattening: Convert multimodal content to text-only for non-vision models.
954
+ # SKIP message flattening for vision models to preserve image_url parts!
955
+ # The old code here was flattening multimodal content (list) to text-only (str),
956
+ # which strips out image_url parts. This breaks vision models.
957
+ # Only flatten for non-vision models that can't handle multimodal format.
958
+ is_vision_model = False
959
+ try:
960
+ # Check if the policy is a vision-capable policy
961
+ if isinstance(policy, CrafterPolicy):
962
+ is_vision_model = getattr(policy, "use_vision", False)
963
+ except Exception:
964
+ pass
965
+
966
+ logger.debug(f"🔊 [POLICY_ROUTES] is_vision_model={is_vision_model}, will_flatten={not is_vision_model}")
967
+
968
+ if not is_vision_model:
969
+ # Only flatten for non-vision models (backward compatibility)
970
+ req_body2 = meta.get("inference_request", {})
971
+ if isinstance(req_body2, dict):
972
+ msgs = req_body2.get("messages")
973
+ if isinstance(msgs, list):
974
+ new_msgs = []
975
+ changed = False
976
+ for m in msgs:
977
+ try:
978
+ if isinstance(m, dict):
979
+ content = m.get("content")
980
+ if isinstance(content, list):
981
+ parts: list[str] = []
982
+ for seg in content:
983
+ if isinstance(seg, dict):
984
+ txt = seg.get("text") or seg.get("content")
985
+ if isinstance(txt, str) and txt:
986
+ parts.append(txt)
987
+ m2 = dict(m)
988
+ m2["content"] = "\n".join(parts)
989
+ new_msgs.append(m2)
990
+ changed = True
991
+ else:
992
+ new_msgs.append(m)
993
+ else:
994
+ new_msgs.append(m)
995
+ except Exception:
996
+ new_msgs.append(m)
997
+ if changed:
998
+ req_body2["messages"] = new_msgs
999
+ meta["inference_request"] = req_body2
1000
+ logger.debug(f"🔊 [POLICY_ROUTES] Flattened messages for non-vision model")
1001
+ else:
1002
+ logger.debug(f"🔊 [POLICY_ROUTES] Preserving multimodal content for vision model")
1003
+
1004
+ # DEBUG: Log final message structure before calling inference
1005
+ final_req = meta.get("inference_request", {})
1006
+ if isinstance(final_req, dict):
1007
+ final_msgs = final_req.get("messages", [])
1008
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Sending {len(final_msgs)} messages to inference")
1009
+ for idx, msg in enumerate(final_msgs):
1010
+ if isinstance(msg, dict):
1011
+ content = msg.get("content")
1012
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Message[{idx}]: type={type(content).__name__}, is_list={isinstance(content, list)}")
1013
+ if isinstance(content, list):
1014
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Content list has {len(content)} items")
1015
+ for part_idx, part in enumerate(content[:3]): # Show first 3 items
1016
+ if isinstance(part, dict):
1017
+ logger.debug(f"🔊 [POLICY_ROUTES_FINAL] Part[{part_idx}]: type={part.get('type')}")
1018
+
1019
+ _t_start = _t.time()
1020
+ call_started_at = datetime.utcnow()
1021
+ async with _inference_sem:
1022
+ inference_response = await client.generate_with_retries(
1023
+ request=meta["inference_request"],
1024
+ base_url=meta["inference_url"],
1025
+ max_retries=12,
1026
+ backoff_factor=2.0,
1027
+ extra_headers=extra_headers,
1028
+ )
1029
+ meta["inference_ms"] = int((_t.time() - _t_start) * 1000)
1030
+ call_completed_at = datetime.utcnow()
1031
+
1032
+ provider_url = str(meta.get("inference_url") or "")
1033
+ low_url = provider_url.lower()
1034
+ if "groq" in low_url:
1035
+ provider_name = "groq"
1036
+ elif "openai" in low_url:
1037
+ provider_name = "openai"
1038
+ else:
1039
+ provider_name = "custom"
1040
+
1041
+ # Parse response to tool calls
1042
+ tool_calls = policy.parse_response_to_tool_calls(
1043
+ response=inference_response,
1044
+ use_tools=getattr(policy, "use_tools", True),
1045
+ )
1046
+
1047
+ # Debug logging (echo tool calls)
1048
+ if not tool_calls:
1049
+ # Structured error log with small preview; avoid dumping full response repeatedly
1050
+ preview = str(inference_response)[:400]
1051
+ logger.error(
1052
+ f"TOOLCALL_PARSE_FAIL: policy={policy_name} parsed=0 preview={preview}"
1053
+ )
1054
+ else:
1055
+ try:
1056
+ import json as _json
1057
+
1058
+ print(
1059
+ {
1060
+ "tool_calls_parsed": int(len(tool_calls)),
1061
+ "tool_calls_preview": _json.dumps(tool_calls)[:20000],
1062
+ }
1063
+ )
1064
+ except Exception:
1065
+ logger.info(f"Parsed {len(tool_calls)} tool calls: {tool_calls}")
1066
+
1067
+ # Add response to metadata
1068
+ # Parse tool calls from model response using policy-specific parser
1069
+ try:
1070
+ if hasattr(policy, "parse_response_to_tool_calls"):
1071
+ parsed = policy.parse_response_to_tool_calls(
1072
+ inference_response, getattr(policy, "use_tools", True)
1073
+ )
1074
+ else:
1075
+ parsed = policy.parse_model_response(inference_response, request.observation)
1076
+ # Replace tool_calls with parsed result
1077
+ if isinstance(parsed, list):
1078
+ tool_calls = parsed
1079
+ with contextlib.suppress(Exception):
1080
+ logger.info(
1081
+ "TOOLCALL_PARSE: parsed=%d has_tools=%s example=%r",
1082
+ len(tool_calls) if isinstance(tool_calls, list) else -1,
1083
+ bool(getattr(policy, "use_tools", True)),
1084
+ (tool_calls[0] if isinstance(tool_calls, list) and tool_calls else None),
1085
+ )
1086
+ except Exception as _pe:
1087
+ logger.warning(f"Failed to parse tool calls: {str(_pe)}")
1088
+ # Attach raw response + usage for observability
1089
+ meta["raw_response"] = inference_response
1090
+ if "usage" in inference_response:
1091
+ meta["usage"] = inference_response["usage"]
1092
+
1093
+ if tracing_context is not None:
1094
+ try:
1095
+ await tracing_context.record_llm_call(
1096
+ inference_request=meta["inference_request"],
1097
+ inference_response=inference_response,
1098
+ tool_calls=tool_calls,
1099
+ provider=provider_name,
1100
+ model_name=model_name,
1101
+ started_at=call_started_at,
1102
+ completed_at=call_completed_at,
1103
+ latency_ms=meta.get("inference_ms"),
1104
+ )
1105
+ except Exception as exc:
1106
+ logger.debug(f"TRACING_LLM_FAIL: {exc}")
1107
+
1108
+ if not tool_calls:
1109
+ preview = ""
1110
+ try:
1111
+ preview = str(meta.get("raw_response") or "")[:400]
1112
+ except Exception:
1113
+ preview = "<unavailable>"
1114
+ logger.error(
1115
+ {
1116
+ "rollout.policy_step": True,
1117
+ "policy_id": request.policy_id,
1118
+ "error": "no_tool_calls",
1119
+ "inference_url": meta.get("inference_url"),
1120
+ "raw_preview": preview,
1121
+ }
1122
+ )
1123
+ raise RuntimeError("Policy step produced no tool calls; inference response unusable.")
1124
+
1125
+ return PolicyStepResponse(
1126
+ tool_calls=tool_calls,
1127
+ meta=meta,
1128
+ )
1129
+
1130
+ except Exception as e:
1131
+ logger.error(f"Failed to step policy {request.policy_id}: {e}")
1132
+ raise HTTPException(status_code=500, detail=str(e)) from e
1133
+
1134
+
1135
+ @router.post("/snapshot", response_model=PolicySnapshotResponse)
1136
+ async def snapshot_policy(request: PolicySnapshotRequest) -> PolicySnapshotResponse:
1137
+ """Create a snapshot of the policy state."""
1138
+ handle = registry.get_policy(request.policy_id)
1139
+ if not handle:
1140
+ raise HTTPException(status_code=404, detail=f"Policy {request.policy_id} not found")
1141
+
1142
+ try:
1143
+ # Serialize policy state
1144
+ state_dict = await handle.policy.serialize()
1145
+
1146
+ # Save to volume
1147
+ snapshot_id, path, size = storage.save_snapshot(
1148
+ rl_run_id=handle.rl_run_id,
1149
+ kind="policy",
1150
+ state_dict=state_dict,
1151
+ )
1152
+
1153
+ # Register snapshot
1154
+ registry.register_snapshot(
1155
+ kind="policy",
1156
+ rl_run_id=handle.rl_run_id,
1157
+ size=size,
1158
+ path=path,
1159
+ )
1160
+
1161
+ return PolicySnapshotResponse(
1162
+ snapshot_id=snapshot_id,
1163
+ path=path,
1164
+ rl_run_id=handle.rl_run_id,
1165
+ size=size,
1166
+ )
1167
+
1168
+ except Exception as e:
1169
+ logger.error(f"Failed to snapshot policy {request.policy_id}: {e}")
1170
+ raise HTTPException(status_code=500, detail=str(e)) from e
1171
+
1172
+
1173
+ @router.post("/restore", response_model=PolicyRestoreResponse)
1174
+ async def restore_policy(request: PolicyRestoreRequest) -> PolicyRestoreResponse:
1175
+ """Restore a policy from a snapshot."""
1176
+ snapshot = registry.get_snapshot(request.snapshot_id)
1177
+ if not snapshot:
1178
+ raise HTTPException(status_code=404, detail=f"Snapshot {request.snapshot_id} not found")
1179
+
1180
+ if snapshot.kind != "policy":
1181
+ raise HTTPException(
1182
+ status_code=422,
1183
+ detail=f"Snapshot {request.snapshot_id} is not a policy snapshot",
1184
+ )
1185
+
1186
+ try:
1187
+ # Load snapshot from volume
1188
+ state_dict, meta = storage.load_snapshot(
1189
+ rl_run_id=snapshot.rl_run_id,
1190
+ kind="policy",
1191
+ snapshot_id=request.snapshot_id,
1192
+ )
1193
+
1194
+ # Recreate policy
1195
+ policy_name = state_dict["name"]
1196
+ low = policy_name.lower()
1197
+ if low in ["crafter-react", "crafter"]:
1198
+ policy = await CrafterPolicy.deserialize(state_dict)
1199
+ elif low in ["wordle-react", "wordle"]:
1200
+ try:
1201
+ from .envs.wordle.policy import WordlePolicy
1202
+ except Exception as e:
1203
+ raise HTTPException(
1204
+ status_code=500, detail=f"Wordle policy unavailable: {e}"
1205
+ ) from e
1206
+ policy = await WordlePolicy.deserialize(state_dict)
1207
+ elif low in ["sokoban-react", "sokoban"]:
1208
+ try:
1209
+ from .envs.sokoban.policy import SokobanPolicy
1210
+ except Exception as e:
1211
+ raise HTTPException(
1212
+ status_code=500, detail=f"Sokoban policy unavailable: {e}"
1213
+ ) from e
1214
+ policy = await SokobanPolicy.deserialize(state_dict)
1215
+ else:
1216
+ raise HTTPException(
1217
+ status_code=422,
1218
+ detail=f"Unknown policy name in snapshot: {policy_name}",
1219
+ )
1220
+
1221
+ # Register new instance
1222
+ policy_id = registry.register_policy(
1223
+ policy=policy,
1224
+ rl_run_id=snapshot.rl_run_id,
1225
+ )
1226
+
1227
+ return PolicyRestoreResponse(policy_id=policy_id)
1228
+
1229
+ except Exception as e:
1230
+ logger.error(f"Failed to restore policy from snapshot {request.snapshot_id}: {e}")
1231
+ raise HTTPException(status_code=500, detail=str(e)) from e
1232
+
1233
+
1234
+ @router.post("/terminate", response_model=PolicyTerminateResponse)
1235
+ async def terminate_policy(request: PolicyTerminateRequest) -> PolicyTerminateResponse:
1236
+ """Terminate a policy and clean up resources."""
1237
+ handle = registry.get_policy(request.policy_id)
1238
+ if not handle:
1239
+ raise HTTPException(status_code=404, detail=f"Policy {request.policy_id} not found")
1240
+
1241
+ try:
1242
+ # Call terminate on the policy
1243
+ await handle.policy.terminate()
1244
+
1245
+ # Remove from registry
1246
+ registry.remove_policy(request.policy_id)
1247
+
1248
+ return PolicyTerminateResponse(ok=True)
1249
+
1250
+ except Exception as e:
1251
+ logger.error(f"Failed to terminate policy {request.policy_id}: {e}")
1252
+ raise HTTPException(status_code=500, detail=str(e)) from e