synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (889) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. examples/rl/task_app/math_single_step.py +990 -0
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +63 -0
  461. synth_ai/api/train/builders.py +473 -0
  462. synth_ai/api/train/cli.py +1185 -0
  463. synth_ai/api/train/config_finder.py +246 -0
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +352 -0
  470. synth_ai/api/train/pollers.py +91 -0
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +195 -0
  475. synth_ai/api/train/utils.py +244 -0
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +90 -45
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +166 -114
  554. synth_ai/cli/root.py +143 -112
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +3134 -0
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +745 -416
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +7 -1
  583. synth_ai/demos/demo_task_apps/core.py +75 -37
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/config.toml +55 -110
  591. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  592. synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
  593. synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
  594. synth_ai/demos/math/__init__.py +1 -0
  595. synth_ai/demos/math/_common.py +16 -0
  596. synth_ai/demos/math/app.py +38 -0
  597. synth_ai/demos/math/config.toml +76 -0
  598. synth_ai/demos/math/deploy_modal.py +54 -0
  599. synth_ai/demos/math/modal_task_app.py +703 -0
  600. synth_ai/demos/math/task_app_entry.py +51 -0
  601. synth_ai/environments/environment/core.py +7 -1
  602. synth_ai/environments/examples/bandit/engine.py +12 -5
  603. synth_ai/environments/examples/bandit/environment.py +0 -1
  604. synth_ai/environments/examples/bandit/taskset.py +4 -4
  605. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  606. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  607. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  608. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  609. synth_ai/environments/examples/enron/engine.py +7 -2
  610. synth_ai/environments/examples/enron/environment.py +68 -0
  611. synth_ai/environments/examples/red/engine.py +60 -12
  612. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  613. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  614. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  615. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  616. synth_ai/environments/examples/red/environment.py +86 -0
  617. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  618. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  619. synth_ai/environments/examples/verilog/engine.py +104 -12
  620. synth_ai/environments/examples/wordle/environment.py +0 -1
  621. synth_ai/environments/reproducibility/tree.py +5 -6
  622. synth_ai/environments/service/app.py +11 -12
  623. synth_ai/environments/service/core_routes.py +10 -9
  624. synth_ai/environments/stateful/engine.py +1 -1
  625. synth_ai/environments/tasks/core.py +1 -0
  626. synth_ai/environments/tasks/filters.py +5 -6
  627. synth_ai/environments/tasks/utils.py +4 -5
  628. synth_ai/evals/__init__.py +15 -0
  629. synth_ai/evals/base.py +14 -5
  630. synth_ai/evals/client.py +82 -0
  631. synth_ai/evals/types.py +42 -0
  632. synth_ai/http.py +8 -22
  633. synth_ai/http_client.py +45 -12
  634. synth_ai/inference/__init__.py +0 -2
  635. synth_ai/inference/client.py +21 -7
  636. synth_ai/jobs/client.py +129 -80
  637. synth_ai/judge_schemas.py +127 -0
  638. synth_ai/learning/__init__.py +51 -6
  639. synth_ai/learning/algorithms.py +14 -0
  640. synth_ai/learning/client.py +122 -30
  641. synth_ai/learning/config.py +2 -40
  642. synth_ai/learning/constants.py +0 -2
  643. synth_ai/learning/ft_client.py +4 -56
  644. synth_ai/learning/health.py +14 -8
  645. synth_ai/learning/jobs.py +43 -47
  646. synth_ai/learning/prompt_learning_client.py +276 -0
  647. synth_ai/learning/prompt_learning_types.py +185 -0
  648. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  649. synth_ai/learning/rl/client.py +269 -0
  650. synth_ai/learning/rl/config.py +31 -0
  651. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  652. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  653. synth_ai/learning/rl/secrets.py +13 -0
  654. synth_ai/learning/rl_client.py +2 -253
  655. synth_ai/learning/sft/__init__.py +29 -0
  656. synth_ai/learning/sft/client.py +68 -0
  657. synth_ai/learning/sft/config.py +270 -0
  658. synth_ai/learning/sft/data.py +698 -0
  659. synth_ai/learning/sse.py +25 -26
  660. synth_ai/learning/validators.py +29 -25
  661. synth_ai/mcp/__init__.py +5 -0
  662. synth_ai/mcp/__main__.py +8 -0
  663. synth_ai/mcp/main.py +254 -0
  664. synth_ai/mcp/setup.py +100 -0
  665. synth_ai/modal.py +257 -0
  666. synth_ai/pricing/__init__.py +3 -0
  667. synth_ai/pricing/model_pricing.py +64 -0
  668. synth_ai/session/__init__.py +75 -0
  669. synth_ai/session/client.py +383 -0
  670. synth_ai/session/constants.py +63 -0
  671. synth_ai/session/exceptions.py +105 -0
  672. synth_ai/session/manager.py +139 -0
  673. synth_ai/session/models.py +89 -0
  674. synth_ai/session/query.py +110 -0
  675. synth_ai/spec/__init__.py +46 -0
  676. synth_ai/spec/dataclasses.py +149 -0
  677. synth_ai/spec/loader.py +144 -0
  678. synth_ai/spec/serializer.py +199 -0
  679. synth_ai/spec/validation.py +250 -0
  680. synth_ai/streaming/__init__.py +29 -0
  681. synth_ai/streaming/config.py +94 -0
  682. synth_ai/streaming/handlers.py +589 -0
  683. synth_ai/streaming/streamer.py +320 -0
  684. synth_ai/streaming/types.py +95 -0
  685. synth_ai/task/__init__.py +116 -3
  686. synth_ai/task/apps/__init__.py +132 -0
  687. synth_ai/task/auth.py +165 -0
  688. synth_ai/task/client.py +167 -0
  689. synth_ai/task/config.py +261 -0
  690. synth_ai/task/contracts.py +173 -57
  691. synth_ai/task/datasets.py +108 -0
  692. synth_ai/task/errors.py +50 -0
  693. synth_ai/task/health.py +17 -11
  694. synth_ai/task/inference_api.py +101 -0
  695. synth_ai/task/json.py +111 -0
  696. synth_ai/task/proxy.py +251 -0
  697. synth_ai/task/rubrics/__init__.py +55 -0
  698. synth_ai/task/rubrics/loaders.py +156 -0
  699. synth_ai/task/rubrics/models.py +57 -0
  700. synth_ai/task/rubrics/scoring.py +116 -0
  701. synth_ai/task/rubrics/strict.py +149 -0
  702. synth_ai/task/rubrics.py +219 -0
  703. synth_ai/task/server.py +432 -0
  704. synth_ai/task/trace_correlation_helpers.py +328 -0
  705. synth_ai/task/tracing_utils.py +95 -0
  706. synth_ai/task/validators.py +449 -6
  707. synth_ai/task/vendors.py +59 -0
  708. synth_ai/tracing_v3/__init__.py +4 -0
  709. synth_ai/tracing_v3/abstractions.py +21 -4
  710. synth_ai/tracing_v3/config.py +167 -22
  711. synth_ai/tracing_v3/constants.py +21 -0
  712. synth_ai/tracing_v3/db_config.py +42 -29
  713. synth_ai/tracing_v3/decorators.py +80 -45
  714. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  715. synth_ai/tracing_v3/hooks.py +6 -4
  716. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  717. synth_ai/tracing_v3/migration_helper.py +1 -2
  718. synth_ai/tracing_v3/replica_sync.py +12 -7
  719. synth_ai/tracing_v3/serialization.py +130 -0
  720. synth_ai/tracing_v3/session_tracer.py +86 -21
  721. synth_ai/tracing_v3/storage/base.py +98 -12
  722. synth_ai/tracing_v3/storage/config.py +63 -16
  723. synth_ai/tracing_v3/storage/factory.py +11 -9
  724. synth_ai/tracing_v3/storage/utils.py +15 -11
  725. synth_ai/tracing_v3/trace_utils.py +317 -0
  726. synth_ai/tracing_v3/turso/__init__.py +8 -21
  727. synth_ai/tracing_v3/turso/daemon.py +123 -15
  728. synth_ai/tracing_v3/turso/models.py +5 -2
  729. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  730. synth_ai/tracing_v3/utils.py +5 -4
  731. synth_ai/tunnel.py +143 -0
  732. synth_ai/tunnel_deploy.py +278 -0
  733. synth_ai/types.py +8 -0
  734. synth_ai/urls.py +11 -0
  735. synth_ai/utils/__init__.py +166 -0
  736. synth_ai/utils/agents.py +74 -0
  737. synth_ai/utils/apps.py +152 -0
  738. synth_ai/utils/base_url.py +94 -0
  739. synth_ai/utils/bin.py +39 -0
  740. synth_ai/utils/claude.py +36 -0
  741. synth_ai/utils/cli.py +284 -0
  742. synth_ai/utils/config.py +81 -0
  743. synth_ai/utils/env.py +346 -0
  744. synth_ai/utils/errors.py +85 -0
  745. synth_ai/utils/http.py +172 -0
  746. synth_ai/utils/json.py +72 -0
  747. synth_ai/utils/log_filter.py +99 -0
  748. synth_ai/utils/logging.py +198 -0
  749. synth_ai/utils/modal.py +299 -0
  750. synth_ai/utils/paths.py +95 -0
  751. synth_ai/utils/process.py +233 -0
  752. synth_ai/utils/prompts.py +39 -0
  753. synth_ai/utils/sqld.py +122 -0
  754. synth_ai/utils/ssl.py +25 -0
  755. synth_ai/utils/task_app_discovery.py +882 -0
  756. synth_ai/utils/task_app_env.py +186 -0
  757. synth_ai/utils/task_app_state.py +318 -0
  758. synth_ai/utils/tunnel/__init__.py +12 -0
  759. synth_ai/utils/tunnel/config.py +55 -0
  760. synth_ai/utils/user_config.py +137 -0
  761. synth_ai/uvicorn.py +77 -0
  762. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  763. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  764. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  765. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  766. synth_ai/cli/man.py +0 -106
  767. synth_ai/core/experiment.py +0 -15
  768. synth_ai/core/system.py +0 -15
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -63
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/tracing/__init__.py +0 -30
  838. synth_ai/tracing_v1/__init__.py +0 -33
  839. synth_ai/tracing_v3/turso/manager.py +0 -760
  840. synth_ai/v0/tracing/abstractions.py +0 -224
  841. synth_ai/v0/tracing/base_client.py +0 -91
  842. synth_ai/v0/tracing/client_manager.py +0 -131
  843. synth_ai/v0/tracing/config.py +0 -142
  844. synth_ai/v0/tracing/context.py +0 -146
  845. synth_ai/v0/tracing/decorators.py +0 -682
  846. synth_ai/v0/tracing/events/__init__.py +0 -0
  847. synth_ai/v0/tracing/events/manage.py +0 -147
  848. synth_ai/v0/tracing/events/scope.py +0 -86
  849. synth_ai/v0/tracing/events/store.py +0 -228
  850. synth_ai/v0/tracing/immediate_client.py +0 -151
  851. synth_ai/v0/tracing/local.py +0 -18
  852. synth_ai/v0/tracing/log_client_base.py +0 -73
  853. synth_ai/v0/tracing/retry_queue.py +0 -186
  854. synth_ai/v0/tracing/trackers.py +0 -515
  855. synth_ai/v0/tracing/upload.py +0 -512
  856. synth_ai/v0/tracing/utils.py +0 -9
  857. synth_ai/v0/tracing_v1/__init__.py +0 -16
  858. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  859. synth_ai/v0/tracing_v1/base_client.py +0 -91
  860. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  861. synth_ai/v0/tracing_v1/config.py +0 -142
  862. synth_ai/v0/tracing_v1/context.py +0 -146
  863. synth_ai/v0/tracing_v1/decorators.py +0 -703
  864. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  865. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  866. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  867. synth_ai/v0/tracing_v1/events/store.py +0 -228
  868. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  869. synth_ai/v0/tracing_v1/local.py +0 -18
  870. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  871. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  872. synth_ai/v0/tracing_v1/trackers.py +0 -515
  873. synth_ai/v0/tracing_v1/upload.py +0 -527
  874. synth_ai/v0/tracing_v1/utils.py +0 -9
  875. synth_ai/zyk/__init__.py +0 -30
  876. synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
  877. synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
  878. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  879. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  880. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  881. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  882. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  885. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  886. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  887. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  888. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  889. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -1,12 +1,20 @@
1
- from __future__ import annotations
2
-
3
1
  """Modal task app for Hendrycks MATH single-step RL environment."""
4
2
 
3
+ from __future__ import annotations
4
+
5
5
  import os
6
+ from collections.abc import Iterable
7
+ from functools import lru_cache
6
8
  from pathlib import Path
7
9
 
10
+ from starlette.requests import Request
11
+
8
12
  from modal import App, Image, Secret, asgi_app
9
- from functools import lru_cache
13
+
14
+ try: # Backward compatibility with older installed SDKs
15
+ from synth_ai.demos.demo_task_apps.core import DEFAULT_TASK_APP_SECRET_NAME
16
+ except Exception: # pragma: no cover - occurs on older deployments
17
+ DEFAULT_TASK_APP_SECRET_NAME = "hendrycks-math-task-app-secret"
10
18
 
11
19
  # Self-contained: no external problem bank installer required
12
20
 
@@ -17,7 +25,9 @@ _SYNTH_HOSTED = None
17
25
  try:
18
26
  probe = _HERE
19
27
  for _ in range(8):
20
- candidate = (probe / "backend/app/routes/clustered_training/dev/synth_envs_hosted").resolve()
28
+ candidate = (
29
+ probe / "backend/app/routes/clustered_training/dev/synth_envs_hosted"
30
+ ).resolve()
21
31
  if candidate.exists():
22
32
  _SYNTH_HOSTED = candidate
23
33
  break
@@ -42,8 +52,40 @@ if _SYNTH_HOSTED is not None:
42
52
 
43
53
  # No extra local dirs required; app is self-contained
44
54
 
55
+
56
+ def _build_inline_secret() -> Secret:
57
+ required = ("ENVIRONMENT_API_KEY",)
58
+ optional = ("SYNTH_API_KEY", "OPENAI_API_KEY")
59
+ payload: dict[str, str] = {}
60
+ missing: list[str] = []
61
+
62
+ for key in required:
63
+ value = (os.environ.get(key) or "").strip()
64
+ if not value:
65
+ missing.append(key)
66
+ else:
67
+ payload[key] = value
68
+
69
+ for key in optional:
70
+ value = (os.environ.get(key) or "").strip()
71
+ if value:
72
+ payload[key] = value
73
+
74
+ if missing:
75
+ raise RuntimeError(
76
+ "Missing required environment values for inline secret: " + ", ".join(missing)
77
+ )
78
+
79
+ previews = ", ".join(f"{k}:len={len(v)}" for k, v in payload.items())
80
+ print(f"[startup] TASK_APP_SECRET_NAME={DEFAULT_TASK_APP_SECRET_NAME}")
81
+ print(f"[startup] inline secret prepared ({previews})")
82
+
83
+ return Secret.from_dict(payload)
84
+
85
+
86
+ INLINE_SECRET = _build_inline_secret()
87
+
45
88
  app = App("hendrycks-math-task-app")
46
- _SECRET_NAME = "synth-math-demo-secret"
47
89
 
48
90
 
49
91
  @app.function(
@@ -52,38 +94,109 @@ _SECRET_NAME = "synth-math-demo-secret"
52
94
  memory=16384,
53
95
  cpu=4,
54
96
  min_containers=1,
55
- secrets=[Secret.from_name(_SECRET_NAME)],
97
+ secrets=[INLINE_SECRET],
56
98
  )
57
99
  @asgi_app()
58
100
  def fastapi_app():
59
101
  import httpx
60
- from fastapi import Body, HTTPException, status
61
- from fastapi import FastAPI, Request, Header
102
+ from fastapi import Body, FastAPI, HTTPException, status
62
103
  from fastapi.middleware.cors import CORSMiddleware
63
104
  from fastapi.responses import JSONResponse
64
105
 
106
+ try:
107
+ from synth_ai.task.auth import (
108
+ is_api_key_header_authorized,
109
+ normalize_environment_api_key,
110
+ )
111
+ except Exception: # pragma: no cover - fallback for older synth-ai builds
112
+
113
+ def _normalize_env_key_fallback() -> str | None:
114
+ key = os.getenv("ENVIRONMENT_API_KEY")
115
+ if key:
116
+ return key
117
+ for alias in ("dev_environment_api_key", "DEV_ENVIRONMENT_API_KEY"):
118
+ candidate = os.getenv(alias)
119
+ if candidate:
120
+ os.environ["ENVIRONMENT_API_KEY"] = candidate
121
+ return candidate
122
+ return None
123
+
124
+ def normalize_environment_api_key() -> str | None: # type: ignore[override]
125
+ return _normalize_env_key_fallback()
126
+
127
+ def _header_values(request: Request, header: str) -> Iterable[str]:
128
+ raw = request.headers.get(header) or request.headers.get(header.lower())
129
+ return [raw] if raw is not None else []
130
+
131
+ def _split(values: Iterable[str]) -> list[str]:
132
+ parts: list[str] = []
133
+ for value in values:
134
+ if not isinstance(value, str):
135
+ continue
136
+ for chunk in value.split(","):
137
+ chunk = chunk.strip()
138
+ if chunk:
139
+ parts.append(chunk)
140
+ return parts
141
+
142
+ def is_api_key_header_authorized(request: Request) -> bool: # type: ignore[override]
143
+ expected = normalize_environment_api_key()
144
+ if not expected:
145
+ return False
146
+ single = _header_values(request, "x-api-key")
147
+ multi = _header_values(request, "x-api-keys")
148
+ auth = _header_values(request, "authorization")
149
+ bearer = []
150
+ for token in auth:
151
+ if isinstance(token, str) and token.lower().startswith("bearer "):
152
+ bearer.append(token.split(" ", 1)[1].strip())
153
+ candidates = _split(single + multi + bearer)
154
+ return any(candidate == expected for candidate in candidates)
155
+
65
156
  # Inline, self-contained FastAPI app (math-only)
66
157
  @lru_cache(maxsize=1)
67
158
  def _hf_split(subject: str, split: str, slice_spec: str | None = None):
68
159
  from datasets import load_dataset # type: ignore
160
+
69
161
  s = split
70
162
  if slice_spec:
71
163
  s = f"{s}{slice_spec}"
72
- return load_dataset("nlile/hendrycks-MATH-benchmark", subject, split=s)
164
+
165
+ try:
166
+ return load_dataset("nlile/hendrycks-MATH-benchmark", subject, split=s)
167
+ except ValueError:
168
+ base = load_dataset("nlile/hendrycks-MATH-benchmark", split=s)
169
+ if subject and subject not in {"", "default"}:
170
+ if "subject" in base.column_names:
171
+ base = base.filter(lambda ex: ex.get("subject") == subject)
172
+ elif isinstance(base, list):
173
+ base = [ex for ex in base if ex.get("subject") == subject]
174
+ return base
73
175
 
74
176
  def _normalize_answer_text(s: str) -> str:
75
177
  import re as _re
178
+
76
179
  return _re.sub(r"[^0-9A-Za-z.+\-/*=]", "", (s or "").strip()).lower()
77
180
 
78
181
  def _extract_boxed(s: str) -> str:
79
182
  import re as _re
183
+
80
184
  m = list(_re.finditer(r"\\boxed\{([^}]+)\}", s or ""))
81
185
  return m[-1].group(1) if m else ""
82
186
 
83
187
  def _load_hendrycks_problem(seed: int, subject: str | None = None) -> tuple[str, str]:
84
188
  subj = subject or os.getenv("HENDRYCKS_MATH_CONFIG", "default")
85
- ds = _hf_split(subj, os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE"))
189
+ ds = _hf_split(
190
+ subj, os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE")
191
+ )
86
192
  n = len(ds) if hasattr(ds, "__len__") else 0
193
+ if n == 0 and subject not in {"", "default"}:
194
+ ds = _hf_split(
195
+ "default",
196
+ os.getenv("HENDRYCKS_MATH_SPLIT", "test"),
197
+ os.getenv("HENDRYCKS_MATH_SLICE"),
198
+ )
199
+ n = len(ds) if hasattr(ds, "__len__") else 0
87
200
  if n == 0:
88
201
  raise RuntimeError("Hendrycks MATH dataset loaded empty")
89
202
  idx = abs(int(seed)) % n
@@ -104,47 +217,154 @@ def fastapi_app():
104
217
  allow_headers=["*"],
105
218
  )
106
219
 
220
+ import logging
221
+
222
+ logger = logging.getLogger("hendrycks_math_task_app")
223
+ if not logger.handlers:
224
+ logger.addHandler(logging.StreamHandler())
225
+ logger.setLevel(logging.INFO)
226
+
227
+ def _log_env_key_prefix(source: str, env_key: str | None) -> str | None:
228
+ if not env_key:
229
+ return None
230
+ half = max(1, len(env_key) // 2)
231
+ prefix = env_key[:half]
232
+ msg = f"[{source}] expected ENVIRONMENT_API_KEY prefix: {prefix}"
233
+ print(msg)
234
+ logger.info(msg)
235
+ return prefix
236
+
237
+ def _resolve_env_keys() -> set[str]:
238
+ keys: set[str] = set()
239
+ for alias in (
240
+ "ENVIRONMENT_API_KEY",
241
+ "dev_environment_api_key",
242
+ "DEV_ENVIRONMENT_API_KEY",
243
+ ):
244
+ value = os.environ.get(alias)
245
+ if value:
246
+ os.environ.setdefault("ENVIRONMENT_API_KEY", value)
247
+ keys.add(value)
248
+ alias_env = os.environ.get("ENVIRONMENT_API_KEY_ALIASES", "")
249
+ for chunk in alias_env.split(","):
250
+ trimmed = chunk.strip()
251
+ if trimmed:
252
+ keys.add(trimmed)
253
+ return keys
254
+
255
+ def _extract_header_candidates(
256
+ request: Request,
257
+ x_api_key: str | None,
258
+ x_api_keys: str | None,
259
+ authorization: str | None,
260
+ ) -> list[str]:
261
+ headers = request.headers
262
+ candidates: list[str] = []
263
+ primary = x_api_key or headers.get("x-api-key")
264
+ if primary:
265
+ candidates.append(primary.strip())
266
+ secondary = x_api_keys or headers.get("x-api-keys")
267
+ if secondary:
268
+ candidates.extend(
269
+ [value.strip() for value in secondary.split(",") if value.strip()]
270
+ )
271
+ auth_header = (
272
+ authorization or headers.get("authorization") or headers.get("Authorization")
273
+ )
274
+ if auth_header and auth_header.lower().startswith("bearer "):
275
+ token = auth_header.split(" ", 1)[1].strip()
276
+ if token:
277
+ candidates.append(token)
278
+ return [c for c in candidates if c]
279
+
280
+ def _is_authorized(
281
+ request: Request,
282
+ x_api_key: str | None,
283
+ x_api_keys: str | None,
284
+ authorization: str | None,
285
+ ) -> bool:
286
+ keys = _resolve_env_keys()
287
+ if not keys:
288
+ return False
289
+ candidates = _extract_header_candidates(request, x_api_key, x_api_keys, authorization)
290
+ return any(candidate in keys for candidate in candidates)
291
+
107
292
  @app.get("/info")
108
293
  async def info():
109
294
  return {
110
295
  "service": {"base_url": os.getenv("SERVICE_BASE_URL", "")},
111
- "inference": {"base_url": "", "endpoints": {"chat_completions": "/v1/chat/completions"}},
296
+ "inference": {
297
+ "base_url": "",
298
+ "endpoints": {"chat_completions": "/v1/chat/completions"},
299
+ },
112
300
  }
113
301
 
114
302
  @app.get("/health")
115
- async def health(x_api_key: str | None = Header(default=None, alias="X-API-Key")):
116
- env_key = os.environ.get("ENVIRONMENT_API_KEY")
303
+ async def health(request: Request):
304
+ env_keys = _resolve_env_keys()
305
+ env_key = next(iter(env_keys), None)
117
306
  if not env_key:
118
- return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
119
- if x_api_key is not None and x_api_key != env_key:
120
- return JSONResponse(status_code=401, content={"status": "unauthorized", "detail": "Invalid API key"})
121
- return {"status": "healthy"}
307
+ return JSONResponse(
308
+ status_code=503,
309
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
310
+ )
311
+ # Authorize using all header variants; avoid typed Header params to prevent 422s
312
+ authorized = is_api_key_header_authorized(request)
313
+ if not authorized:
314
+ prefix = _log_env_key_prefix("health", env_key)
315
+ content = {
316
+ "status": "healthy",
317
+ "authorized": False,
318
+ }
319
+ if prefix:
320
+ content["expected_api_key_prefix"] = prefix
321
+ return JSONResponse(status_code=200, content=content)
322
+ return {"status": "healthy", "authorized": True}
122
323
 
123
324
  # Optional rollout-specific health for CLI compatibility
124
325
  @app.get("/health/rollout")
125
- async def health_rollout(x_api_key: str | None = Header(default=None, alias="X-API-Key")):
126
- env_key = os.environ.get("ENVIRONMENT_API_KEY")
326
+ async def health_rollout(request: Request):
327
+ env_keys = _resolve_env_keys()
328
+ env_key = next(iter(env_keys), None)
127
329
  if not env_key:
128
- return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
129
- if not x_api_key or x_api_key != env_key:
130
- return JSONResponse(status_code=401, content={"status": "unauthorized", "detail": "Invalid or missing API key"})
131
- return {"ok": True}
330
+ return JSONResponse(
331
+ status_code=503,
332
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
333
+ )
334
+ authorized = is_api_key_header_authorized(request)
335
+ if not authorized:
336
+ prefix = _log_env_key_prefix("health/rollout", env_key)
337
+ content = {
338
+ "status": "healthy",
339
+ "authorized": False,
340
+ }
341
+ if prefix:
342
+ content["expected_api_key_prefix"] = prefix
343
+ return JSONResponse(status_code=200, content=content)
344
+ return {"ok": True, "authorized": True}
132
345
 
133
346
  # _load_hendrycks_problem is defined at fastapi_app scope
134
347
 
135
348
  @app.get("/task_info")
136
- async def task_info(seed: int = 0, subject: str = "algebra"):
349
+ async def task_info(seed: int = 0, subject: str = "default"):
137
350
  """Return Hendrycks MATH problem/answer and tool schema for a seed."""
138
351
  q, a = _load_hendrycks_problem(int(seed), subject=subject)
139
- tools = [{
140
- "name": "interact",
141
- "description": "Submit one or more actions to the math environment.",
142
- "parameters": {
143
- "type": "object",
144
- "properties": {"actions": {"type": "array", "items": {"type": "string"}}},
145
- "required": ["actions"],
146
- },
147
- }]
352
+ tools = [
353
+ {
354
+ "name": "submit_answer",
355
+ "description": "Provide the final numerical or algebraic answer for the current math problem.",
356
+ "parameters": {
357
+ "type": "object",
358
+ "properties": {
359
+ "answer": {
360
+ "type": "string",
361
+ "description": "The proposed final answer",
362
+ },
363
+ },
364
+ "required": ["answer"],
365
+ },
366
+ }
367
+ ]
148
368
  return {
149
369
  "seed": int(seed),
150
370
  "subject": subject,
@@ -159,6 +379,27 @@ def fastapi_app():
159
379
 
160
380
  api = create_app()
161
381
 
382
+ # Always log and surface 422 validation errors with header presence snapshot
383
+ from fastapi.exceptions import RequestValidationError
384
+
385
+ @api.exception_handler(RequestValidationError)
386
+ async def _on_validation_error(request: Request, exc: RequestValidationError):
387
+ try:
388
+ hdr = request.headers
389
+ snapshot = {
390
+ "path": str(request.url.path),
391
+ "have_x_api_key": bool(hdr.get("x-api-key")),
392
+ "have_x_api_keys": bool(hdr.get("x-api-keys")),
393
+ "have_authorization": bool(hdr.get("authorization")),
394
+ "errors": exc.errors()[:5],
395
+ }
396
+ print("[422] validation", snapshot, flush=True)
397
+ except Exception:
398
+ pass
399
+ return JSONResponse(
400
+ status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
401
+ )
402
+
162
403
  @api.get("/")
163
404
  async def root_probe():
164
405
  return {"status": "ok", "service": "math"}
@@ -170,40 +411,51 @@ def fastapi_app():
170
411
  env_key = (
171
412
  os.environ.get("ENVIRONMENT_API_KEY")
172
413
  or os.environ.get("DEV_ENVIRONMENT_API_KEY")
173
- or os.environ.get("dev_environment_api_key")
414
+ or os.environ.get("DEV_ENVIRONMENT_API_KEY")
174
415
  )
175
416
  if not env_key:
176
417
  raise RuntimeError("ENVIRONMENT_API_KEY missing in task app environment")
177
418
 
178
- OPENAI_REMOVE_FIELDS = ("stop_after_tool_calls", "thinking_mode", "thinking_budget", "reasoning")
179
- OPENAI_REMOVE_SAMPLING_FIELDS = ("temperature", "top_p")
180
- TOOL_CHOICE_FORCE = {"type": "function", "function": {"name": "interact_many"}}
419
+ openai_remove_fields = (
420
+ "stop_after_tool_calls",
421
+ "thinking_mode",
422
+ "thinking_budget",
423
+ "reasoning",
424
+ )
425
+ openai_remove_sampling_fields = ("temperature", "top_p")
426
+ tool_choice_force = {"type": "function", "function": {"name": "submit_answer"}}
181
427
 
182
428
  def _prepare_openai_payload(model: str | None, payload: dict[str, object]) -> dict[str, object]:
183
429
  sanitized = dict(payload)
184
- for key in OPENAI_REMOVE_FIELDS:
430
+ for key in openai_remove_fields:
185
431
  sanitized.pop(key, None)
186
432
  if model and "gpt-5" in model:
187
433
  if "max_tokens" in sanitized and "max_completion_tokens" not in sanitized:
188
434
  sanitized["max_completion_tokens"] = sanitized.pop("max_tokens")
189
435
  else:
190
436
  sanitized.pop("max_tokens", None)
191
- for field in OPENAI_REMOVE_SAMPLING_FIELDS:
437
+ for field in openai_remove_sampling_fields:
192
438
  sanitized.pop(field, None)
193
- sanitized["tool_choice"] = TOOL_CHOICE_FORCE
194
- sanitized["parallel_tool_calls"] = False
195
- return sanitized
439
+ sanitized["tool_choice"] = tool_choice_force
440
+ sanitized["parallel_tool_calls"] = False
441
+ return sanitized
196
442
 
197
443
  @api.post("/proxy/v1/chat/completions")
198
444
  def proxy_chat_completions(request: dict[str, object] = Body(...)):
199
445
  key = os.environ.get("OPENAI_API_KEY")
200
446
  if not key:
201
- raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="OPENAI_API_KEY missing")
447
+ raise HTTPException(
448
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="OPENAI_API_KEY missing"
449
+ )
202
450
  model = request.get("model") if isinstance(request, dict) else None
203
- payload = _prepare_openai_payload(model if isinstance(model, str) else None, request if isinstance(request, dict) else {})
451
+ payload = _prepare_openai_payload(
452
+ model if isinstance(model, str) else None, request if isinstance(request, dict) else {}
453
+ )
204
454
  headers = {"Authorization": f"Bearer {key}"}
205
455
  with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
206
- resp = client.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers)
456
+ resp = client.post(
457
+ "https://api.openai.com/v1/chat/completions", json=payload, headers=headers
458
+ )
207
459
  try:
208
460
  data = resp.json()
209
461
  except Exception:
@@ -217,8 +469,8 @@ def fastapi_app():
217
469
  # Minimal math rollout endpoint: alternates agent/env; calls inference_url chat/completions
218
470
  @api.post("/rollout")
219
471
  def rollout(request: dict[str, object] = Body(...)):
220
- from typing import Any
221
472
  import json as _json
473
+ from typing import Any
222
474
 
223
475
  run_id = str(request.get("run_id"))
224
476
  data = request if isinstance(request, dict) else {}
@@ -236,15 +488,25 @@ def fastapi_app():
236
488
  env_cfg = (env or {}).get("config") or {}
237
489
  # Prefer env.seed; fall back to env.config.seed -> default 0
238
490
  try:
239
- seed_val = int((env or {}).get("seed")) if isinstance(env, dict) and (env or {}).get("seed") is not None else 0
491
+ seed_val = (
492
+ int((env or {}).get("seed"))
493
+ if isinstance(env, dict) and (env or {}).get("seed") is not None
494
+ else 0
495
+ )
240
496
  except Exception:
241
497
  seed_val = 0
242
498
  if seed_val == 0:
243
499
  try:
244
- seed_val = int(env_cfg.get("seed")) if isinstance(env_cfg, dict) and env_cfg.get("seed") is not None else 0
500
+ seed_val = (
501
+ int(env_cfg.get("seed"))
502
+ if isinstance(env_cfg, dict) and env_cfg.get("seed") is not None
503
+ else 0
504
+ )
245
505
  except Exception:
246
506
  seed_val = 0
247
- subject = (env_cfg.get("subject") if isinstance(env_cfg, dict) else None) or os.getenv("HENDRYCKS_MATH_CONFIG", "default")
507
+ subject = (env_cfg.get("subject") if isinstance(env_cfg, dict) else None) or os.getenv(
508
+ "HENDRYCKS_MATH_CONFIG", "default"
509
+ )
248
510
  # Load real Hendrycks problem text/solution (download if necessary). Crash on failure.
249
511
  qh, ah = _load_hendrycks_problem(seed_val, subject=subject)
250
512
  question = qh
@@ -262,11 +524,14 @@ def fastapi_app():
262
524
  sanitized.pop("max_tokens", None)
263
525
  for field in ("temperature", "top_p"):
264
526
  sanitized.pop(field, None)
265
- sanitized["tool_choice"] = {"type": "function", "function": {"name": "interact"}}
527
+ sanitized["tool_choice"] = {
528
+ "type": "function",
529
+ "function": {"name": "submit_answer"},
530
+ }
266
531
  sanitized["parallel_tool_calls"] = False
267
532
  return sanitized
268
533
 
269
- def _parse_tool_actions(resp: dict[str, Any]) -> list[str]:
534
+ def _parse_tool_answer(resp: dict[str, Any]) -> str:
270
535
  try:
271
536
  choices = resp.get("choices")
272
537
  if isinstance(choices, list) and choices:
@@ -275,7 +540,7 @@ def fastapi_app():
275
540
  if isinstance(tcs, list) and tcs:
276
541
  fn = tcs[0].get("function", {}) if isinstance(tcs[0], dict) else {}
277
542
  args = fn.get("arguments")
278
- obj = {}
543
+ obj: dict[str, Any] = {}
279
544
  if isinstance(args, str):
280
545
  try:
281
546
  obj = _json.loads(args)
@@ -283,133 +548,193 @@ def fastapi_app():
283
548
  obj = {}
284
549
  elif isinstance(args, dict):
285
550
  obj = args
286
- acts = obj.get("actions")
287
- if isinstance(acts, list):
288
- return [str(a) for a in acts][:5]
551
+ ans = obj.get("answer")
552
+ if isinstance(ans, str):
553
+ return ans.strip()
289
554
  except Exception:
290
555
  pass
291
- return []
556
+ return ""
292
557
 
293
- # Build minimal context and execute ops
558
+ # Single-step rollout: one agent call followed by evaluation of the returned tool answer
294
559
  history: list[dict[str, Any]] = []
295
560
  steps: list[dict[str, Any]] = []
296
561
  total_reward = 0.0
297
- last_llm_text: str | None = None
298
- last_actions: list[str] = []
299
- for op in ops or []:
300
- if op == "agent":
301
- user_prompt = (
302
- str(question)
303
- if isinstance(question, (str, int, float)) and str(question).strip()
304
- else "Solve the problem. Provide answer steps succinctly."
305
- )
306
- payload = {
307
- "model": model,
308
- "messages": [{"role": "user", "content": user_prompt}],
309
- "tools": [{
310
- "type": "function",
311
- "function": {"name": "interact", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}}}, "required": ["actions"]}},
312
- }],
313
- "max_tokens": 256,
314
- "temperature": 0.2,
562
+
563
+ user_prompt = (
564
+ str(question)
565
+ if isinstance(question, str | int | float) and str(question).strip()
566
+ else "Solve the problem. Provide answer steps succinctly."
567
+ )
568
+ payload = {
569
+ "model": model,
570
+ "messages": [{"role": "user", "content": user_prompt}],
571
+ "tools": [
572
+ {
573
+ "type": "function",
574
+ "function": {
575
+ "name": "submit_answer",
576
+ "parameters": {
577
+ "type": "object",
578
+ "properties": {
579
+ "answer": {"type": "string"},
580
+ },
581
+ "required": ["answer"],
582
+ },
583
+ },
315
584
  }
316
- to_send = _prepare_payload(model if isinstance(model, str) else None, payload)
317
- # Print prompts and tools exposed to the model
318
- try:
319
- tool_names = []
320
- for t in (payload.get("tools") or []):
321
- if isinstance(t, dict):
322
- fn = (t.get("function") or {}) if isinstance(t.get("function"), dict) else {}
323
- name = fn.get("name")
324
- if isinstance(name, str):
325
- tool_names.append(name)
326
- print(f"[math] system: <none>", flush=True)
327
- print(f"[math] user: {user_prompt}", flush=True)
328
- print(f"[math] tools: {tool_names}", flush=True)
329
- except Exception:
330
- pass
331
- headers = {}
332
- if "/proxy" in inference_url:
333
- sk = os.environ.get("SYNTH_API_KEY")
334
- if sk:
335
- headers["Authorization"] = f"Bearer {sk}"
336
- with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
337
- resp = client.post(f"{inference_url}/v1/chat/completions", json=to_send, headers=headers)
338
- try:
339
- data = resp.json()
340
- except Exception:
341
- data = {"error": "invalid_json", "raw": resp.text[:400]}
342
-
343
- # Extract assistant text for visibility/correctness
344
- llm_text = None
345
- try:
346
- _choices = data.get("choices") if isinstance(data, dict) else None
347
- if isinstance(_choices, list) and _choices:
348
- _msg = _choices[0].get("message", {}) if isinstance(_choices[0], dict) else {}
349
- if isinstance(_msg, dict):
350
- _content = _msg.get("content")
351
- if isinstance(_content, str) and _content.strip():
352
- llm_text = _content
353
- except Exception:
354
- llm_text = None
355
-
356
- # Print question, model output, and correctness if we have an expected answer
357
- try:
358
- if question is not None:
359
- print(f"[math] question: {question}", flush=True)
360
- if llm_text is not None:
361
- print(f"[math] llm: {llm_text}", flush=True)
362
- if expected_answer is not None and llm_text is not None:
363
- exp = str(expected_answer).strip()
364
- got = llm_text.strip()
365
- is_correct = exp and (exp in got)
366
- print(f"[math] correct: {bool(is_correct)} (expected fragment: {exp})", flush=True)
367
- except Exception:
368
- pass
369
- last_llm_text = llm_text
370
- acts = _parse_tool_actions(data) or []
371
- last_actions = acts if isinstance(acts, list) else []
372
- steps.append({"obs": {}, "tool_calls": [{"tool_name": "interact", "arguments": _json.dumps({"actions": acts})}], "reward": None, "done": False, "truncated": False, "info": None})
373
- history.append({"actions": acts})
374
- elif op == "env":
375
- # Compute a simple correctness-based reward if expected answer available
376
- reward_val = 0.0
377
- try:
378
- if expected_answer is not None:
379
- # Prefer explicit tool-call answer from last_actions
380
- candidate = ""
381
- if isinstance(last_actions, list) and last_actions:
382
- # Take the last non-empty action as the final answer
383
- for s in reversed(last_actions):
384
- if isinstance(s, str) and s.strip():
385
- candidate = s.strip()
386
- break
387
- # Fallback to parse from llm_text if tool actions absent
388
- if not candidate and last_llm_text is not None:
389
- candidate = _extract_boxed(last_llm_text) or last_llm_text
390
- exp_raw = _extract_boxed(str(expected_answer)) or str(expected_answer)
391
- got_raw = candidate
392
- exp_n = _normalize_answer_text(exp_raw)
393
- got_n = _normalize_answer_text(got_raw)
394
- if exp_n and exp_n in got_n:
395
- reward_val = 1.0
396
- except Exception:
397
- reward_val = 0.0
398
- steps.append({"obs": {}, "tool_calls": [], "reward": reward_val, "done": False, "truncated": False, "info": None})
399
- total_reward += float(reward_val)
400
- else:
401
- continue
585
+ ],
586
+ "max_tokens": 256,
587
+ "temperature": 0.2,
588
+ }
589
+ to_send = _prepare_payload(model if isinstance(model, str) else None, payload)
590
+
591
+ try:
592
+ tool_names = []
593
+ for t in payload.get("tools") or []:
594
+ if isinstance(t, dict):
595
+ fn = (t.get("function") or {}) if isinstance(t.get("function"), dict) else {}
596
+ name = fn.get("name")
597
+ if isinstance(name, str):
598
+ tool_names.append(name)
599
+ print("[math] system: <none>", flush=True)
600
+ print(f"[math] user: {user_prompt}", flush=True)
601
+ print(f"[math] tools: {tool_names}", flush=True)
602
+ except Exception:
603
+ pass
604
+
605
+ headers = {}
606
+ if "/proxy" in inference_url:
607
+ sk = os.environ.get("SYNTH_API_KEY")
608
+ if sk:
609
+ headers["Authorization"] = f"Bearer {sk}"
610
+ with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
611
+ resp = client.post(
612
+ f"{inference_url}/v1/chat/completions", json=to_send, headers=headers
613
+ )
614
+ try:
615
+ data = resp.json()
616
+ except Exception:
617
+ data = {"error": "invalid_json", "raw": resp.text[:400]}
618
+
619
+ llm_text = None
620
+ try:
621
+ _choices = data.get("choices") if isinstance(data, dict) else None
622
+ if isinstance(_choices, list) and _choices:
623
+ _msg = _choices[0].get("message", {}) if isinstance(_choices[0], dict) else {}
624
+ if isinstance(_msg, dict):
625
+ _content = _msg.get("content")
626
+ if isinstance(_content, str) and _content.strip():
627
+ llm_text = _content
628
+ except Exception:
629
+ llm_text = None
630
+
631
+ try:
632
+ if question is not None:
633
+ print(f"[math] question: {question}", flush=True)
634
+ if llm_text is not None:
635
+ print(f"[math] llm: {llm_text}", flush=True)
636
+ if expected_answer is not None and llm_text is not None:
637
+ exp = str(expected_answer).strip()
638
+ got = llm_text.strip()
639
+ is_correct = exp and (exp in got)
640
+ print(f"[math] correct: {bool(is_correct)} (expected fragment: {exp})", flush=True)
641
+ except Exception:
642
+ pass
643
+
644
+ tool_answer = _parse_tool_answer(data)
645
+ history.append({"answer": tool_answer})
646
+ steps.append(
647
+ {
648
+ "obs": {},
649
+ "tool_calls": [
650
+ {
651
+ "tool_name": "submit_answer",
652
+ "arguments": _json.dumps({"answer": tool_answer}),
653
+ }
654
+ ],
655
+ "reward": None,
656
+ "done": False,
657
+ "truncated": False,
658
+ "info": None,
659
+ }
660
+ )
661
+
662
+ # Evaluate answer correctness using tool output (or fall back to assistant text)
663
+ reward_val = 0.0
664
+ candidate = tool_answer or ""
665
+ try:
666
+ if not candidate and llm_text is not None:
667
+ candidate = _extract_boxed(llm_text) or llm_text
668
+ if expected_answer is not None:
669
+ exp_raw = _extract_boxed(str(expected_answer)) or str(expected_answer)
670
+ got_raw = candidate
671
+ exp_n = _normalize_answer_text(exp_raw)
672
+ got_n = _normalize_answer_text(got_raw)
673
+ if exp_n and exp_n in got_n:
674
+ reward_val = 1.0
675
+ except Exception:
676
+ reward_val = 0.0
677
+
678
+ # Immediate, concise rollout logging mirroring RL format
679
+ try:
680
+ preview = tool_answer[:120] + (
681
+ "…" if isinstance(tool_answer, str) and len(tool_answer) > 120 else ""
682
+ )
683
+ components = {
684
+ "env": float(reward_val),
685
+ "rubric_event": 1.0 if bool(tool_answer.strip()) else 0.0,
686
+ "rubric_outcome": 1.0 if float(reward_val) > 0.0 else 0.0,
687
+ }
688
+ print(
689
+ "[MATH_ROLLOUT] run=",
690
+ run_id,
691
+ " seed=",
692
+ seed_val,
693
+ " subject=",
694
+ subject,
695
+ " tool=submit_answer answer=",
696
+ preview,
697
+ " reward=",
698
+ float(reward_val),
699
+ " components=",
700
+ components,
701
+ flush=True,
702
+ )
703
+ except Exception:
704
+ pass
705
+
706
+ total_reward += float(reward_val)
707
+ steps.append(
708
+ {
709
+ "obs": {},
710
+ "tool_calls": [],
711
+ "reward": reward_val,
712
+ "done": True,
713
+ "truncated": False,
714
+ "info": None,
715
+ }
716
+ )
402
717
 
403
- # Compose response similar to SDK contract (simplified)
404
718
  return {
405
719
  "run_id": run_id,
406
- "trajectories": [{"env_id": env_name, "policy_id": (policy or {}).get("policy_name") or "math-react", "steps": steps, "final": {"observation": {}}, "length": len(steps)}],
720
+ "trajectories": [
721
+ {
722
+ "env_id": env_name,
723
+ "policy_id": (policy or {}).get("policy_name") or "math-react",
724
+ "steps": steps,
725
+ "final": {"observation": {}},
726
+ "length": len(steps),
727
+ }
728
+ ],
407
729
  "branches": {},
408
- "metrics": {"episode_returns": [total_reward], "mean_return": float(total_reward), "num_steps": len(steps), "num_episodes": 1},
730
+ "metrics": {
731
+ "episode_returns": [total_reward],
732
+ "mean_return": float(total_reward),
733
+ "num_steps": len(steps),
734
+ "num_episodes": 1,
735
+ },
409
736
  "aborted": False,
410
737
  "ops_executed": len(steps),
411
738
  }
412
739
 
413
740
  return api
414
-
415
-