synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (890) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +60 -2
  461. synth_ai/api/train/builders.py +347 -39
  462. synth_ai/api/train/cli.py +895 -160
  463. synth_ai/api/train/config_finder.py +103 -25
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +70 -20
  470. synth_ai/api/train/pollers.py +29 -4
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +6 -4
  475. synth_ai/api/train/utils.py +64 -52
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +85 -63
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +156 -116
  554. synth_ai/cli/root.py +131 -132
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +2284 -257
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +579 -291
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  583. synth_ai/demos/demo_task_apps/core.py +64 -28
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  591. synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
  592. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  593. synth_ai/demos/math/__init__.py +1 -0
  594. synth_ai/demos/math/_common.py +16 -0
  595. synth_ai/demos/math/app.py +38 -0
  596. synth_ai/demos/math/config.toml +76 -0
  597. synth_ai/demos/math/deploy_modal.py +54 -0
  598. synth_ai/demos/math/modal_task_app.py +703 -0
  599. synth_ai/demos/math/task_app_entry.py +51 -0
  600. synth_ai/environments/environment/core.py +7 -1
  601. synth_ai/environments/examples/bandit/engine.py +12 -5
  602. synth_ai/environments/examples/bandit/environment.py +0 -1
  603. synth_ai/environments/examples/bandit/taskset.py +4 -4
  604. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  605. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  606. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  607. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  608. synth_ai/environments/examples/enron/engine.py +7 -2
  609. synth_ai/environments/examples/enron/environment.py +68 -0
  610. synth_ai/environments/examples/red/engine.py +60 -12
  611. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  612. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  613. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  614. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  615. synth_ai/environments/examples/red/environment.py +86 -0
  616. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  617. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  618. synth_ai/environments/examples/verilog/engine.py +104 -12
  619. synth_ai/environments/examples/wordle/environment.py +0 -1
  620. synth_ai/environments/reproducibility/tree.py +5 -6
  621. synth_ai/environments/service/app.py +11 -12
  622. synth_ai/environments/service/core_routes.py +10 -9
  623. synth_ai/environments/stateful/engine.py +1 -1
  624. synth_ai/environments/tasks/core.py +1 -0
  625. synth_ai/environments/tasks/filters.py +5 -6
  626. synth_ai/environments/tasks/utils.py +4 -5
  627. synth_ai/evals/__init__.py +15 -0
  628. synth_ai/evals/base.py +14 -5
  629. synth_ai/evals/client.py +82 -0
  630. synth_ai/evals/types.py +42 -0
  631. synth_ai/http.py +8 -22
  632. synth_ai/http_client.py +45 -12
  633. synth_ai/inference/__init__.py +0 -2
  634. synth_ai/inference/client.py +21 -7
  635. synth_ai/jobs/client.py +129 -80
  636. synth_ai/judge_schemas.py +127 -0
  637. synth_ai/learning/__init__.py +51 -6
  638. synth_ai/learning/algorithms.py +14 -0
  639. synth_ai/learning/client.py +122 -30
  640. synth_ai/learning/config.py +2 -40
  641. synth_ai/learning/constants.py +0 -2
  642. synth_ai/learning/ft_client.py +4 -56
  643. synth_ai/learning/health.py +14 -8
  644. synth_ai/learning/jobs.py +43 -47
  645. synth_ai/learning/prompt_learning_client.py +276 -0
  646. synth_ai/learning/prompt_learning_types.py +185 -0
  647. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  648. synth_ai/learning/rl/client.py +269 -0
  649. synth_ai/learning/rl/config.py +31 -0
  650. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  651. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  652. synth_ai/learning/rl/secrets.py +13 -0
  653. synth_ai/learning/rl_client.py +2 -253
  654. synth_ai/learning/sft/__init__.py +29 -0
  655. synth_ai/learning/sft/client.py +68 -0
  656. synth_ai/learning/sft/config.py +270 -0
  657. synth_ai/learning/sft/data.py +698 -0
  658. synth_ai/learning/sse.py +25 -26
  659. synth_ai/learning/validators.py +29 -25
  660. synth_ai/mcp/__init__.py +5 -0
  661. synth_ai/mcp/__main__.py +8 -0
  662. synth_ai/mcp/main.py +254 -0
  663. synth_ai/mcp/setup.py +100 -0
  664. synth_ai/modal.py +257 -0
  665. synth_ai/pricing/__init__.py +3 -0
  666. synth_ai/pricing/model_pricing.py +64 -0
  667. synth_ai/session/__init__.py +75 -0
  668. synth_ai/session/client.py +383 -0
  669. synth_ai/session/constants.py +63 -0
  670. synth_ai/session/exceptions.py +105 -0
  671. synth_ai/session/manager.py +139 -0
  672. synth_ai/session/models.py +89 -0
  673. synth_ai/session/query.py +110 -0
  674. synth_ai/spec/__init__.py +46 -0
  675. synth_ai/spec/dataclasses.py +149 -0
  676. synth_ai/spec/loader.py +144 -0
  677. synth_ai/spec/serializer.py +199 -0
  678. synth_ai/spec/validation.py +250 -0
  679. synth_ai/streaming/__init__.py +29 -0
  680. synth_ai/streaming/config.py +94 -0
  681. synth_ai/streaming/handlers.py +589 -0
  682. synth_ai/streaming/streamer.py +320 -0
  683. synth_ai/streaming/types.py +95 -0
  684. synth_ai/task/__init__.py +50 -30
  685. synth_ai/task/apps/__init__.py +63 -19
  686. synth_ai/task/auth.py +35 -23
  687. synth_ai/task/client.py +15 -13
  688. synth_ai/task/config.py +261 -0
  689. synth_ai/task/contracts.py +165 -64
  690. synth_ai/task/datasets.py +9 -6
  691. synth_ai/task/errors.py +11 -10
  692. synth_ai/task/health.py +17 -11
  693. synth_ai/task/inference_api.py +101 -0
  694. synth_ai/task/json.py +58 -24
  695. synth_ai/task/proxy.py +59 -66
  696. synth_ai/task/rubrics/__init__.py +55 -0
  697. synth_ai/task/rubrics/loaders.py +156 -0
  698. synth_ai/task/rubrics/models.py +57 -0
  699. synth_ai/task/rubrics/scoring.py +116 -0
  700. synth_ai/task/rubrics/strict.py +149 -0
  701. synth_ai/task/rubrics.py +22 -15
  702. synth_ai/task/server.py +65 -31
  703. synth_ai/task/trace_correlation_helpers.py +328 -0
  704. synth_ai/task/tracing_utils.py +44 -28
  705. synth_ai/task/validators.py +449 -6
  706. synth_ai/task/vendors.py +5 -7
  707. synth_ai/tracing_v3/__init__.py +4 -0
  708. synth_ai/tracing_v3/abstractions.py +21 -4
  709. synth_ai/tracing_v3/config.py +167 -22
  710. synth_ai/tracing_v3/constants.py +21 -0
  711. synth_ai/tracing_v3/db_config.py +42 -29
  712. synth_ai/tracing_v3/decorators.py +80 -45
  713. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  714. synth_ai/tracing_v3/hooks.py +6 -4
  715. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  716. synth_ai/tracing_v3/migration_helper.py +1 -2
  717. synth_ai/tracing_v3/replica_sync.py +12 -7
  718. synth_ai/tracing_v3/serialization.py +130 -0
  719. synth_ai/tracing_v3/session_tracer.py +73 -16
  720. synth_ai/tracing_v3/storage/base.py +89 -1
  721. synth_ai/tracing_v3/storage/config.py +63 -16
  722. synth_ai/tracing_v3/storage/factory.py +11 -9
  723. synth_ai/tracing_v3/storage/utils.py +15 -11
  724. synth_ai/tracing_v3/trace_utils.py +317 -0
  725. synth_ai/tracing_v3/turso/__init__.py +8 -21
  726. synth_ai/tracing_v3/turso/daemon.py +123 -15
  727. synth_ai/tracing_v3/turso/models.py +5 -2
  728. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  729. synth_ai/tracing_v3/utils.py +5 -4
  730. synth_ai/tunnel.py +143 -0
  731. synth_ai/tunnel_deploy.py +278 -0
  732. synth_ai/types.py +8 -0
  733. synth_ai/urls.py +11 -0
  734. synth_ai/utils/__init__.py +166 -0
  735. synth_ai/utils/agents.py +74 -0
  736. synth_ai/utils/apps.py +152 -0
  737. synth_ai/utils/base_url.py +94 -0
  738. synth_ai/utils/bin.py +39 -0
  739. synth_ai/utils/claude.py +36 -0
  740. synth_ai/utils/cli.py +284 -0
  741. synth_ai/utils/config.py +81 -0
  742. synth_ai/utils/env.py +346 -0
  743. synth_ai/utils/errors.py +85 -0
  744. synth_ai/utils/http.py +172 -0
  745. synth_ai/utils/json.py +72 -0
  746. synth_ai/utils/log_filter.py +99 -0
  747. synth_ai/utils/logging.py +198 -0
  748. synth_ai/utils/modal.py +299 -0
  749. synth_ai/utils/paths.py +95 -0
  750. synth_ai/utils/process.py +233 -0
  751. synth_ai/utils/prompts.py +39 -0
  752. synth_ai/utils/sqld.py +122 -0
  753. synth_ai/utils/ssl.py +25 -0
  754. synth_ai/utils/task_app_discovery.py +882 -0
  755. synth_ai/utils/task_app_env.py +186 -0
  756. synth_ai/utils/task_app_state.py +318 -0
  757. synth_ai/utils/tunnel/__init__.py +12 -0
  758. synth_ai/utils/tunnel/config.py +55 -0
  759. synth_ai/utils/user_config.py +137 -0
  760. synth_ai/uvicorn.py +77 -0
  761. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  762. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  763. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  764. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  765. synth_ai/cli/man.py +0 -106
  766. synth_ai/core/experiment.py +0 -15
  767. synth_ai/core/system.py +0 -15
  768. synth_ai/demo_registry.py +0 -258
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -107
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/task/apps/grpo_crafter.py +0 -438
  838. synth_ai/tracing/__init__.py +0 -30
  839. synth_ai/tracing_v1/__init__.py +0 -33
  840. synth_ai/tracing_v3/turso/manager.py +0 -774
  841. synth_ai/v0/tracing/abstractions.py +0 -224
  842. synth_ai/v0/tracing/base_client.py +0 -91
  843. synth_ai/v0/tracing/client_manager.py +0 -131
  844. synth_ai/v0/tracing/config.py +0 -142
  845. synth_ai/v0/tracing/context.py +0 -146
  846. synth_ai/v0/tracing/decorators.py +0 -682
  847. synth_ai/v0/tracing/events/__init__.py +0 -0
  848. synth_ai/v0/tracing/events/manage.py +0 -147
  849. synth_ai/v0/tracing/events/scope.py +0 -86
  850. synth_ai/v0/tracing/events/store.py +0 -228
  851. synth_ai/v0/tracing/immediate_client.py +0 -151
  852. synth_ai/v0/tracing/local.py +0 -18
  853. synth_ai/v0/tracing/log_client_base.py +0 -73
  854. synth_ai/v0/tracing/retry_queue.py +0 -186
  855. synth_ai/v0/tracing/trackers.py +0 -515
  856. synth_ai/v0/tracing/upload.py +0 -512
  857. synth_ai/v0/tracing/utils.py +0 -9
  858. synth_ai/v0/tracing_v1/__init__.py +0 -16
  859. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  860. synth_ai/v0/tracing_v1/base_client.py +0 -91
  861. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  862. synth_ai/v0/tracing_v1/config.py +0 -142
  863. synth_ai/v0/tracing_v1/context.py +0 -146
  864. synth_ai/v0/tracing_v1/decorators.py +0 -703
  865. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  866. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  867. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  868. synth_ai/v0/tracing_v1/events/store.py +0 -228
  869. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  870. synth_ai/v0/tracing_v1/local.py +0 -18
  871. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  872. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  873. synth_ai/v0/tracing_v1/trackers.py +0 -515
  874. synth_ai/v0/tracing_v1/upload.py +0 -527
  875. synth_ai/v0/tracing_v1/utils.py +0 -9
  876. synth_ai/zyk/__init__.py +0 -30
  877. synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
  878. synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
  879. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  880. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  881. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  882. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  885. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  886. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  887. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  888. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  889. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  890. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,876 @@
1
+ """Task App configuration for the GRPO Crafter example."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import os
7
+ import sys
8
+ from urllib.parse import parse_qs, urlparse
9
+ from collections.abc import Iterable, Sequence
10
+ from contextlib import suppress
11
+ from dataclasses import dataclass
12
+ from datetime import UTC, datetime
13
+ from pathlib import Path
14
+ from typing import Any
15
+
16
+ from fastapi import HTTPException
17
+ from pydantic import BaseModel
18
+
19
+ from pydantic import BaseModel
20
+
21
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
22
+ from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
23
+ from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
24
+ from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
25
+ from synth_ai.task.rubrics import load_rubric
26
+ from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
27
+ from synth_ai.task.tracing_utils import (
28
+ build_tracer_factory,
29
+ resolve_sft_output_dir,
30
+ resolve_tracing_db_url,
31
+ tracing_env_enabled,
32
+ )
33
+ from synth_ai.tracing_v3.session_tracer import SessionTracer
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+ DEFAULT_ALIAS_OPS: list[str] = ["agent", "env"] * 10
38
+ DEFAULT_ALIAS_STEP_REWARDS: dict[str, Any] = {
39
+ "enabled": True,
40
+ "mode": "decision_stepwise",
41
+ "indicator_lambda": 1.0,
42
+ "step_beta": 0.0,
43
+ }
44
+
45
+ _HERE = Path(__file__).resolve()
46
+
47
+
48
+ def _resolve_repo_root() -> Path:
49
+ """Best-effort detection of the Synth AI repo root across local and Modal mounts."""
50
+
51
+ candidates: list[Path] = []
52
+ env_root = os.getenv("SYNTH_AI_REPO_ROOT")
53
+ if env_root:
54
+ candidates.append(Path(env_root).expanduser())
55
+ candidates.append(Path("/opt/synth_ai_repo"))
56
+ candidates.extend(parent for parent in [_HERE.parent, *_HERE.parents])
57
+
58
+ for candidate in candidates:
59
+ try:
60
+ resolved = candidate.resolve()
61
+ except Exception:
62
+ continue
63
+ if not resolved.exists():
64
+ continue
65
+ if (resolved / "pyproject.toml").exists() or (resolved / "uv.lock").exists():
66
+ return resolved
67
+ if (resolved / "synth_ai").is_dir():
68
+ return resolved
69
+
70
+ try:
71
+ return _HERE.parents[3]
72
+ except IndexError:
73
+ return _HERE.parent
74
+
75
+
76
+ def _resolve_task_app_root(repo_root: Path) -> Path:
77
+ """Locate the task_app directory even when the module is copied to a temp mount."""
78
+
79
+ preferred = (repo_root / "examples" / "warming_up_to_rl" / "task_app").resolve()
80
+ if preferred.is_dir():
81
+ return preferred
82
+
83
+ local_parent = _HERE.parent.resolve()
84
+ if (local_parent / "synth_envs_hosted").is_dir():
85
+ return local_parent
86
+
87
+ for parent in _HERE.parents:
88
+ candidate = parent.resolve()
89
+ if (candidate / "synth_envs_hosted").is_dir():
90
+ return candidate
91
+
92
+ fallback = Path("/opt/synth_ai_repo/examples/warming_up_to_rl/task_app")
93
+ if fallback.is_dir():
94
+ return fallback.resolve()
95
+
96
+ return local_parent
97
+
98
+
99
+ REPO_ROOT = _resolve_repo_root()
100
+ TASK_APP_ROOT = _resolve_task_app_root(REPO_ROOT)
101
+ SYNTH_ENVS_HOSTED_ROOT = (TASK_APP_ROOT / "synth_envs_hosted").resolve()
102
+
103
+ EXAMPLES_ROOT = (REPO_ROOT / "examples").resolve()
104
+
105
+ for path in (REPO_ROOT, TASK_APP_ROOT, SYNTH_ENVS_HOSTED_ROOT, EXAMPLES_ROOT):
106
+ try:
107
+ resolved = path.resolve()
108
+ except Exception:
109
+ resolved = path
110
+ if resolved.exists():
111
+ path_str = str(resolved)
112
+ if path_str not in sys.path:
113
+ sys.path.insert(0, path_str)
114
+
115
+ # Fallback: explicitly add Modal mount path for 'examples' if REPO_ROOT detection fails
116
+ try:
117
+ _hard_examples = Path("/opt/synth_ai_repo/examples")
118
+ if _hard_examples.exists():
119
+ _hard_examples_str = str(_hard_examples.resolve())
120
+ if _hard_examples_str not in sys.path:
121
+ sys.path.insert(0, _hard_examples_str)
122
+ except Exception:
123
+ pass
124
+
125
+ try:
126
+ from .synth_envs_hosted.utils import (
127
+ ensure_chat_completions_url,
128
+ extract_trace_correlation_id,
129
+ )
130
+ except Exception: # pragma: no cover - fallback when optional deps missing
131
+ def ensure_chat_completions_url(raw_url, mode=None):
132
+ return raw_url
133
+
134
+ def extract_trace_correlation_id(_raw_url, mode=None):
135
+ if not isinstance(_raw_url, str):
136
+ return None
137
+ parsed = urlparse(_raw_url)
138
+ query_params = parse_qs(parsed.query or "")
139
+ for key in ("cid", "trace", "trace_correlation_id"):
140
+ values = query_params.get(key) or []
141
+ for value in values:
142
+ if isinstance(value, str) and value.strip():
143
+ return value.strip()
144
+ return None
145
+
146
+ HAS_HOSTED = True
147
+ try:
148
+ import crafter # type: ignore
149
+ import crafter.constants as crafter_constants # type: ignore
150
+ from synth_ai.environments.examples.crafter_classic.taskset import TRAIT_BOUNDS
151
+ from synth_envs_hosted.branching import router as branching_router # type: ignore
152
+ from synth_envs_hosted.environment_routes import router as environment_router # type: ignore
153
+ from synth_envs_hosted.hosted_app import TaskApp as HostedTaskApp # type: ignore
154
+ from synth_envs_hosted.policy_routes import router as policy_router # type: ignore
155
+ from synth_envs_hosted.rollout import ( # type: ignore
156
+ RolloutEnvSpec as LegacyRolloutEnvSpec,
157
+ )
158
+ from synth_envs_hosted.rollout import (
159
+ RolloutPolicySpec as LegacyRolloutPolicySpec,
160
+ )
161
+ from synth_envs_hosted.rollout import (
162
+ RolloutRecordConfig as LegacyRolloutRecordConfig,
163
+ )
164
+ from synth_envs_hosted.rollout import (
165
+ RolloutRequest as LegacyRolloutRequest,
166
+ )
167
+ from synth_envs_hosted.rollout import (
168
+ RolloutResponse as LegacyRolloutResponse,
169
+ )
170
+ from synth_envs_hosted.rollout import (
171
+ RolloutSafetyConfig as LegacyRolloutSafetyConfig,
172
+ )
173
+ from synth_envs_hosted.rollout import (
174
+ execute_rollout as legacy_execute_rollout,
175
+ )
176
+ except Exception as exc: # pragma: no cover - import-time validation
177
+ # Provide a more actionable error with the missing module and fix hints
178
+ missing_mod = None
179
+ if isinstance(exc, ModuleNotFoundError):
180
+ missing_mod = (
181
+ getattr(exc, "name", None) or str(exc).split("'")[1] if "'" in str(exc) else None
182
+ )
183
+ fix_hint = None
184
+ if missing_mod:
185
+ mapping = {
186
+ "dotenv": "python-dotenv",
187
+ "crafter": "crafter",
188
+ "httpx": "httpx",
189
+ "aiohttp": "aiohttp",
190
+ "fastapi": "fastapi",
191
+ "uvicorn": "uvicorn",
192
+ "sqlalchemy": "sqlalchemy",
193
+ "aiosqlite": "aiosqlite",
194
+ "greenlet": "greenlet",
195
+ }
196
+ pkg = mapping.get(missing_mod, missing_mod)
197
+ fix_hint = (
198
+ f"Missing Python module '{missing_mod}'. Install the package '{pkg}'.\n"
199
+ f"For Modal: add '{pkg}' to ModalDeploymentConfig.pip_packages in synth_ai/task/apps/grpo_crafter.py.\n"
200
+ f"Locally: pip install {pkg}"
201
+ )
202
+ # Allow running without synth_envs_hosted; gate hosted features off
203
+ if missing_mod == "synth_envs_hosted":
204
+ HAS_HOSTED = False
205
+ else:
206
+ detailed = (
207
+ "grpo_crafter task app requires example dependencies and runtime libs.\n"
208
+ + (fix_hint + "\n" if fix_hint else "")
209
+ + f"Original error: {exc}"
210
+ )
211
+ raise RuntimeError(detailed) from exc
212
+
213
+
214
+ CRAFTING_RULES_SYSTEM_HINT = (
215
+ "Crafter crafting rules (from the paper):\n"
216
+ "- Make Wood Pickaxe: Nearby a table; have wood in inventory.\n"
217
+ "- Make Stone Pickaxe: Nearby a table; have wood and stone in inventory.\n"
218
+ "- Make Iron Pickaxe: Nearby a table; furnace exists; have wood, coal, and iron in inventory.\n"
219
+ "- Make Wood Sword: Nearby a table; have wood in inventory.\n"
220
+ "- Make Stone Sword: Nearby a table; have wood and stone in inventory.\n"
221
+ "- Make Iron Sword: Nearby a table; furnace exists; have wood, coal, and iron in inventory."
222
+ )
223
+
224
+
225
+ DATASET_SPEC = TaskDatasetSpec(
226
+ id="crafter_classic_procedural",
227
+ name="Crafter Classic Procedural Seeds",
228
+ version="1.0.0",
229
+ splits=["train"],
230
+ default_split="train",
231
+ description="Procedural Crafter Classic seeds with reproducible world traits.",
232
+ )
233
+
234
+
235
+ @dataclass
236
+ class CrafterDataset:
237
+ spec: TaskDatasetSpec
238
+
239
+ def __post_init__(self) -> None:
240
+ self.default_seed = int(env_value("CRAFTER_DEFAULT_SEED", 42))
241
+ self.seed_min = 0
242
+ self.seed_max = int(env_value("CRAFTER_MAX_SEED", 2**31 - 1))
243
+ area_env = env_value("CRAFTER_AREA", "64,64")
244
+ self.area = tuple(int(x) for x in str(area_env).split(","))
245
+ self.length = int(env_value("CRAFTER_EPISODE_LENGTH", 10000))
246
+ self._cache: dict[int, dict[str, Any]] = {}
247
+
248
+ def config_for_seed(self, seed: int) -> dict[str, Any]:
249
+ return {
250
+ "seed": int(seed),
251
+ "area": list(self.area),
252
+ "length": self.length,
253
+ }
254
+
255
+ def describe_seed(self, seed: int) -> dict[str, Any]:
256
+ seed = int(seed)
257
+ if seed in self._cache:
258
+ return self._cache[seed]
259
+ env = crafter.Env(area=self.area, length=self.length, seed=seed)
260
+ try:
261
+ env.reset()
262
+ traits = _compute_world_traits(env)
263
+ player = getattr(env, "_player", None)
264
+ inventory = dict(getattr(player, "inventory", {})) if player else {}
265
+ position = getattr(player, "pos", None)
266
+ finally:
267
+ close_fn = getattr(env, "close", None)
268
+ if callable(close_fn):
269
+ close_fn()
270
+ summary = {
271
+ "seed": seed,
272
+ "difficulty": self._difficulty(traits),
273
+ "traits": traits,
274
+ "inventory": inventory,
275
+ "player_position": list(position) if position is not None else None,
276
+ "config": self.config_for_seed(seed),
277
+ }
278
+ self._cache[seed] = summary
279
+ return summary
280
+
281
+ def _difficulty(self, traits: dict[str, int]) -> str:
282
+ for difficulty, bounds in TRAIT_BOUNDS.items():
283
+ if traits.get("trees", 0) >= bounds.get("min_trees", 0) and traits.get(
284
+ "hostiles", 0
285
+ ) <= bounds.get("max_hostiles", 0):
286
+ return difficulty
287
+ return "custom"
288
+
289
+ @property
290
+ def seed_range(self) -> list[int]:
291
+ return [self.seed_min, self.seed_max]
292
+
293
+
294
+ def _compute_world_traits(env: crafter.Env, radius: int = 10) -> dict[str, int]:
295
+ # Local copy to avoid import-time issues; mirrors synth_ai.environments.examples.crafter_classic.taskset.world_traits
296
+ import numpy as _np # type: ignore
297
+ from crafter import objects as _objects # type: ignore
298
+
299
+ player = getattr(env, "_player", None)
300
+ if player is None:
301
+ return {"trees": 0, "cows": 0, "hostiles": 0}
302
+ pos = _np.array(getattr(player, "pos", [0, 0]))
303
+ counts = {"trees": 0, "cows": 0, "hostiles": 0}
304
+ world = getattr(env, "_world", None)
305
+ objects = getattr(world, "_objects", []) if world is not None else []
306
+ for obj in objects:
307
+ if obj is None or obj is player:
308
+ continue
309
+ try:
310
+ if _np.abs(obj.pos - pos).sum() > radius:
311
+ continue
312
+ except Exception:
313
+ continue
314
+ if isinstance(obj, _objects.Plant) and getattr(obj, "kind", "") == "tree":
315
+ counts["trees"] += 1
316
+ elif isinstance(obj, _objects.Cow):
317
+ counts["cows"] += 1
318
+ elif isinstance(obj, _objects.Zombie | _objects.Skeleton):
319
+ counts["hostiles"] += 1
320
+ return counts
321
+
322
+
323
+ def env_value(key: str, default: Any) -> Any:
324
+ return os.getenv(key, default)
325
+
326
+
327
+ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
328
+ registry = TaskDatasetRegistry()
329
+ dataset = CrafterDataset(DATASET_SPEC)
330
+ registry.register(DATASET_SPEC, lambda _spec: dataset, cache=True)
331
+ return registry, dataset
332
+
333
+
334
+ def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
335
+ return TaskInfo(
336
+ task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
337
+ environment="crafter",
338
+ action_space={
339
+ "type": "discrete",
340
+ "size": len(crafter_constants.actions),
341
+ "actions": list(crafter_constants.actions),
342
+ },
343
+ observation={
344
+ "summary": "RGB frame plus inventory, achievements, and semantic map patches.",
345
+ "keys": ["image", "inventory", "achievements", "semantic_map_patch7"],
346
+ "image_shape": [64, 64, 3],
347
+ },
348
+ dataset={
349
+ **DATASET_SPEC.model_dump(),
350
+ "seed_range": dataset.seed_range,
351
+ "default_seed": dataset.default_seed,
352
+ },
353
+ rubric={
354
+ "version": "1",
355
+ "criteria_count": 2,
356
+ "source": "inline",
357
+ "aggregation": "weighted_sum",
358
+ },
359
+ inference={
360
+ "supports_proxy": True,
361
+ "endpoints": {
362
+ "openai": "/proxy/v1/chat/completions",
363
+ "groq": "/proxy/groq/v1/chat/completions",
364
+ },
365
+ "tool": {"name": "interact", "parallel_tool_calls": False},
366
+ },
367
+ capabilities={
368
+ "supports_rollout": True,
369
+ "supports_env_lifecycle": True,
370
+ "requires_api_key_header": True,
371
+ },
372
+ limits={"max_ops": 100000, "max_time_s": 3600},
373
+ )
374
+
375
+
376
+ OUTCOME_RUBRIC = load_rubric(
377
+ {
378
+ "version": "1",
379
+ "goal_text": "Reward unlocking Crafter achievements and survival.",
380
+ "aggregation": "weighted_sum",
381
+ "criteria": [
382
+ {
383
+ "id": "achievements",
384
+ "description": "Unlock achievements or crafting milestones.",
385
+ "weight": 1.0,
386
+ },
387
+ {
388
+ "id": "survival",
389
+ "description": "Maintain health, food, and drink levels.",
390
+ "weight": 1.0,
391
+ },
392
+ ],
393
+ }
394
+ )
395
+
396
+ EVENTS_RUBRIC = load_rubric(
397
+ {
398
+ "version": "1",
399
+ "goal_text": "Encourage purposeful step-wise exploration and crafting.",
400
+ "aggregation": "weighted_sum",
401
+ "criteria": [
402
+ {
403
+ "id": "progress_steps",
404
+ "description": "Actions progress quests, crafting, or exploration.",
405
+ "weight": 1.0,
406
+ }
407
+ ],
408
+ }
409
+ )
410
+
411
+
412
+ def describe_taskset(dataset: CrafterDataset) -> dict[str, Any]:
413
+ return {
414
+ **DATASET_SPEC.model_dump(),
415
+ "seed_range": dataset.seed_range,
416
+ "default_seed": dataset.default_seed,
417
+ "config": {
418
+ "area": list(dataset.area),
419
+ "length": dataset.length,
420
+ },
421
+ }
422
+
423
+
424
+ def provide_task_instances(
425
+ dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
426
+ ) -> Iterable[TaskInfo]:
427
+ infos: list[TaskInfo] = []
428
+ base_observation = getattr(base_info, "observation", None)
429
+ if hasattr(base_observation, "model_dump"):
430
+ observation_template = base_observation.model_dump()
431
+ elif isinstance(base_observation, dict):
432
+ observation_template = dict(base_observation)
433
+ else:
434
+ observation_template = {}
435
+ for seed_value in seeds:
436
+ summary = dataset.describe_seed(seed_value)
437
+ infos.append(
438
+ TaskInfo(
439
+ task=base_info.task,
440
+ environment=base_info.environment,
441
+ action_space=base_info.action_space,
442
+ observation={
443
+ **observation_template,
444
+ "seed": seed_value,
445
+ "traits": summary["traits"],
446
+ "inventory": summary["inventory"],
447
+ "player_position": summary["player_position"],
448
+ },
449
+ dataset={
450
+ **base_info.dataset.model_dump(),
451
+ "seed": seed_value,
452
+ "difficulty": summary["difficulty"],
453
+ "config": summary["config"],
454
+ },
455
+ rubric=base_info.rubric,
456
+ inference=base_info.inference,
457
+ capabilities=base_info.capabilities,
458
+ limits=base_info.limits,
459
+ )
460
+ )
461
+ return infos
462
+
463
+
464
+ def _normalise_op(op_value: Any, index: int) -> str:
465
+ if isinstance(op_value, str):
466
+ candidate = op_value
467
+ elif isinstance(op_value, dict):
468
+ candidate = op_value.get("type") or op_value.get("op")
469
+ else:
470
+ candidate = None
471
+ if not candidate:
472
+ raise ValueError(f"Missing op type at index {index}")
473
+ lowered = str(candidate).strip().lower()
474
+ if lowered in {"policy", "agent", "model"}:
475
+ return "agent"
476
+ if lowered in {"env", "environment", "step"}:
477
+ return "env"
478
+ raise ValueError(f"Unsupported op type '{candidate}' at index {index}")
479
+
480
+
481
+ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
482
+ """Map legacy math env/policy names to crafter and enrich rollout defaults."""
483
+
484
+ def _needs_crafter(name: str | None) -> bool:
485
+ if not name:
486
+ return False
487
+ lowered = str(name).strip().lower()
488
+ return lowered.startswith("math")
489
+
490
+ env_updates: dict[str, Any] = {}
491
+ policy_updates: dict[str, Any] = {}
492
+ alias_applied = False
493
+
494
+ if _needs_crafter(request.env.env_name):
495
+ env_updates["env_name"] = "crafter"
496
+ alias_applied = True
497
+ if request.env.env_id and _needs_crafter(request.env.env_id):
498
+ env_updates["env_id"] = None
499
+ alias_applied = True
500
+ if _needs_crafter(request.policy.policy_name):
501
+ policy_updates["policy_name"] = "crafter-react"
502
+ alias_applied = True
503
+ if request.policy.policy_id and _needs_crafter(request.policy.policy_id):
504
+ policy_updates["policy_id"] = None
505
+ alias_applied = True
506
+
507
+ if not alias_applied:
508
+ return request
509
+
510
+ updated_env = request.env.model_copy(update=env_updates) if env_updates else request.env
511
+ updated_policy = (
512
+ request.policy.model_copy(update=policy_updates) if policy_updates else request.policy
513
+ )
514
+
515
+ env_cfg = dict(updated_env.config or {})
516
+ env_cfg.setdefault("difficulty", "normal")
517
+ env_cfg.setdefault("step_rewards", dict(DEFAULT_ALIAS_STEP_REWARDS))
518
+ env_cfg.setdefault("env_params", {"max_steps_per_episode": 200})
519
+ updated_env = updated_env.model_copy(update={"config": env_cfg})
520
+
521
+ policy_cfg = dict(updated_policy.config or {})
522
+ policy_cfg.setdefault("max_llm_calls", 10)
523
+ policy_cfg.setdefault("max_completion_tokens", 1024)
524
+ policy_cfg.setdefault("temperature", 0.2)
525
+ policy_cfg.setdefault("step_rewards", dict(DEFAULT_ALIAS_STEP_REWARDS))
526
+ updated_policy = updated_policy.model_copy(update={"config": policy_cfg})
527
+
528
+ ops_override = request.ops
529
+ if not ops_override or len(ops_override) < len(DEFAULT_ALIAS_OPS):
530
+ ops_override = list(DEFAULT_ALIAS_OPS)
531
+
532
+ coerced = request.model_copy(update={"env": updated_env, "policy": updated_policy, "ops": ops_override})
533
+
534
+ with suppress(Exception):
535
+ print(
536
+ "[rollout] remapped math request -> crafter "
537
+ f"(env={request.env.env_name!r}→{coerced.env.env_name!r}, "
538
+ f"policy={request.policy.policy_name!r}→{coerced.policy.policy_name!r})",
539
+ flush=True,
540
+ )
541
+ with suppress(Exception):
542
+ logger.info(
543
+ "ROLLOUT_ALIAS: remapped math env/policy to crafter (env=%s→%s, policy=%s→%s)",
544
+ request.env.env_name,
545
+ coerced.env.env_name,
546
+ request.policy.policy_name,
547
+ coerced.policy.policy_name,
548
+ )
549
+
550
+ return coerced
551
+
552
+
553
+ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
554
+ # If hosted env service code is not bundled, return a no-op rollout response compatible with contracts
555
+ if not HAS_HOSTED:
556
+ return RolloutResponse(
557
+ run_id=request.run_id,
558
+ trajectories=[],
559
+ branches={},
560
+ metrics=RolloutMetrics(
561
+ episode_returns=[],
562
+ mean_return=0.0,
563
+ num_steps=0,
564
+ num_episodes=0,
565
+ details={},
566
+ ),
567
+ aborted=False,
568
+ ops_executed=0,
569
+ trace=None,
570
+ )
571
+
572
+ request = _coerce_math_to_crafter(request)
573
+
574
+ record_cfg = request.record.model_copy(
575
+ update={
576
+ "return_trace": True,
577
+ "trace_format": "structured",
578
+ }
579
+ )
580
+ request = request.model_copy(update={"record": record_cfg})
581
+
582
+ policy_cfg = dict(request.policy.config or {})
583
+ logger.info(
584
+ "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
585
+ sorted(policy_cfg.keys()),
586
+ policy_cfg.get("inference_url"),
587
+ request.run_id,
588
+ request.mode,
589
+ )
590
+ inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
591
+ if isinstance(inferred_url, str) and inferred_url:
592
+ policy_cfg["inference_url"] = inferred_url
593
+ else:
594
+ logger.warning(
595
+ "ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
596
+ request.run_id,
597
+ policy_cfg.get("inference_url"),
598
+ )
599
+
600
+ trace_correlation_id = extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=request.mode)
601
+ if request.mode == RolloutMode.RL:
602
+ assert trace_correlation_id, (
603
+ f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
604
+ f"policy_cfg_keys={sorted(policy_cfg.keys())} inference_url={policy_cfg.get('inference_url')}"
605
+ )
606
+ if trace_correlation_id:
607
+ policy_cfg["trace_correlation_id"] = trace_correlation_id
608
+
609
+ pipeline_metadata: dict[str, Any] = {}
610
+ if trace_correlation_id:
611
+ pipeline_metadata["trace_correlation_id"] = trace_correlation_id
612
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
613
+ pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
614
+
615
+ try:
616
+ max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
617
+ except Exception:
618
+ max_llm_calls = 10
619
+ policy_cfg.setdefault("max_llm_calls", max_llm_calls)
620
+ policy_cfg.setdefault("max_tokens", 512)
621
+ policy_cfg.setdefault("max_completion_tokens", 512)
622
+ policy_cfg.setdefault("temperature", 0.2)
623
+ policy_cfg.setdefault("top_p", 0.95)
624
+
625
+ env_cfg = dict(request.env.config or {})
626
+ env_params = dict(env_cfg.get("env_params") or {})
627
+ try:
628
+ max_steps_episode = int(env_params.get("max_steps_per_episode") or max_llm_calls)
629
+ except Exception:
630
+ max_steps_episode = max_llm_calls
631
+ desired_steps = max(max_llm_calls, max_steps_episode)
632
+ env_params["max_steps_per_episode"] = int(desired_steps)
633
+ env_cfg["env_params"] = env_params
634
+
635
+ updated_policy = request.policy.model_copy(update={"config": policy_cfg})
636
+ updated_env = request.env.model_copy(update={"config": env_cfg})
637
+ request = request.model_copy(update={"policy": updated_policy, "env": updated_env})
638
+
639
+ converted_ops: list[str] = [_normalise_op(op, idx) for idx, op in enumerate(request.ops)]
640
+ max_ops_allowed = max_llm_calls * 2 if max_llm_calls > 0 else len(converted_ops)
641
+ if max_ops_allowed and len(converted_ops) > max_ops_allowed:
642
+ converted_ops = converted_ops[:max_ops_allowed]
643
+ legacy_request = LegacyRolloutRequest(
644
+ run_id=request.run_id,
645
+ env=LegacyRolloutEnvSpec(
646
+ env_id=request.env.env_id,
647
+ env_name=request.env.env_name,
648
+ config=env_cfg,
649
+ seed=request.env.seed,
650
+ ),
651
+ policy=LegacyRolloutPolicySpec(
652
+ policy_id=request.policy.policy_id,
653
+ policy_name=request.policy.policy_name,
654
+ config=policy_cfg,
655
+ ),
656
+ ops=converted_ops,
657
+ record=LegacyRolloutRecordConfig(**request.record.model_dump()),
658
+ on_done=request.on_done,
659
+ branch=None,
660
+ safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
661
+ training_session_id=request.training_session_id,
662
+ synth_base_url=request.synth_base_url,
663
+ mode=request.mode,
664
+ )
665
+
666
+ legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
667
+ legacy_request, fastapi_request
668
+ )
669
+ data = legacy_response.model_dump()
670
+ logger.debug(
671
+ "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
672
+ sorted(data.keys()),
673
+ bool(data.get("trace")),
674
+ )
675
+ metrics = data.get("metrics", {}) or {}
676
+ metrics.setdefault("outcome_score", None)
677
+ metrics.setdefault("events_score", None)
678
+ metrics.setdefault("details", {})
679
+ data["metrics"] = metrics
680
+
681
+ if data.get("trace") is None:
682
+ legacy_trace = getattr(legacy_response, "trace", None)
683
+ if legacy_trace is not None:
684
+ data["trace"] = legacy_trace
685
+ else:
686
+ tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
687
+ if callable(tracer_factory):
688
+ tracer = tracer_factory()
689
+ logger.debug(
690
+ "ROLLOUT_EXEC: trace backfill factory=%s", type(tracer)
691
+ )
692
+ if isinstance(tracer, SessionTracer):
693
+ try:
694
+ await tracer.initialize()
695
+ if tracer.db is not None:
696
+ trace_row = await tracer.db.get_session_trace(request.run_id)
697
+ if trace_row is not None:
698
+ data["trace"] = trace_row
699
+ except Exception as exc:
700
+ logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
701
+ finally:
702
+ with suppress(Exception):
703
+ await tracer.close()
704
+
705
+ final_cid = trace_correlation_id or f"trace_{request.run_id}"
706
+ data["trace_correlation_id"] = final_cid
707
+
708
+ existing_meta = data.get("pipeline_metadata")
709
+ if not isinstance(existing_meta, dict):
710
+ existing_meta = {}
711
+ existing_meta.setdefault("trace_correlation_id", final_cid)
712
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
713
+ existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
714
+ data["pipeline_metadata"] = existing_meta
715
+
716
+ # Propagate inference_url into each legacy trajectory entry for downstream tooling.
717
+ inferred_url = policy_cfg.get("inference_url")
718
+ # Normalize the url before propagating into trajectories
719
+ try:
720
+ from .synth_envs_hosted.utils import (
721
+ ensure_chat_completions_url as _ensure_cc,
722
+ force_normalize_chat_completions_url as _force_cc,
723
+ )
724
+ if isinstance(inferred_url, str) and inferred_url:
725
+ inferred_url = _force_cc(inferred_url)
726
+ inferred_url = _ensure_cc(inferred_url, mode=request.mode)
727
+ except Exception:
728
+ pass
729
+
730
+ if "trajectories" in data:
731
+ normalized_trajs: list[dict[str, Any]] = []
732
+ for traj in data.get("trajectories", []):
733
+ if isinstance(traj, BaseModel):
734
+ traj_dict = traj.model_dump()
735
+ elif isinstance(traj, dict):
736
+ traj_dict = dict(traj)
737
+ else:
738
+ continue
739
+ traj_dict.setdefault("trace_correlation_id", final_cid)
740
+ if isinstance(inferred_url, str) and inferred_url and not traj_dict.get("inference_url"):
741
+ traj_dict["inference_url"] = inferred_url
742
+
743
+ # Inject nested info.meta.inference_url for each step (required by RL trainer)
744
+ try:
745
+ steps = traj_dict.get("steps", [])
746
+ if isinstance(steps, list):
747
+ for step in steps:
748
+ if not isinstance(step, dict):
749
+ continue
750
+ info = step.get("info")
751
+ if not isinstance(info, dict):
752
+ info = {}
753
+ meta = info.get("meta")
754
+ if not isinstance(meta, dict):
755
+ meta = {}
756
+ if isinstance(inferred_url, str) and inferred_url and not meta.get("inference_url"):
757
+ meta["inference_url"] = inferred_url
758
+ info["meta"] = meta
759
+ step["info"] = info
760
+ except Exception:
761
+ pass
762
+
763
+ normalized_trajs.append(traj_dict)
764
+ if normalized_trajs:
765
+ data["trajectories"] = normalized_trajs
766
+
767
+ if data.get("trace") is None:
768
+ data["trace"] = {
769
+ "session_id": request.run_id,
770
+ "created_at": datetime.now(UTC).isoformat(),
771
+ "metadata": dict(existing_meta),
772
+ "event_history": [],
773
+ "markov_blanket_message_history": [],
774
+ }
775
+ raise HTTPException(
776
+ status_code=500, detail="trace_payload_missing: task app did not emit a SessionTrace"
777
+ )
778
+
779
+ return RolloutResponse.model_validate(data)
780
+
781
+
782
+ def build_config() -> TaskAppConfig:
783
+ registry, dataset = build_dataset()
784
+ base_info = _base_task_info(dataset)
785
+
786
+ hosted_task_app = HostedTaskApp() if HAS_HOSTED else None
787
+
788
+ tracing_enabled = tracing_env_enabled()
789
+ tracing_db_url = resolve_tracing_db_url()
790
+ tracer_factory = build_tracer_factory(
791
+ SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
792
+ )
793
+ sft_output_dir = resolve_sft_output_dir()
794
+
795
+ app_state: dict[str, Any] = {
796
+ "task_app": hosted_task_app,
797
+ "allowed_environments": ["crafter"],
798
+ "tracing_enabled": tracing_enabled,
799
+ }
800
+ if tracer_factory is not None:
801
+ app_state["session_tracer_factory"] = tracer_factory
802
+ if sft_output_dir:
803
+ app_state["sft_output_dir"] = sft_output_dir
804
+
805
+ if tracing_enabled:
806
+ status_msg = f"[task:tracing] enabled (db={tracing_db_url or 'default'})"
807
+ else:
808
+ status_msg = "[task:tracing] disabled"
809
+ print(status_msg, flush=True)
810
+ if sft_output_dir:
811
+ print(f"[task:sft] writing JSONL to {sft_output_dir}", flush=True)
812
+
813
+ def _describe_taskset() -> dict[str, Any]:
814
+ return describe_taskset(dataset)
815
+
816
+ def _provide_instances(seeds: Sequence[int]):
817
+ return provide_task_instances(dataset, base_info, seeds)
818
+
819
+ routers: tuple = (environment_router, policy_router, branching_router) if HAS_HOSTED else ()
820
+
821
+ config = TaskAppConfig(
822
+ app_id="grpo-crafter",
823
+ name="GRPO Crafter Task App",
824
+ description="Crafter Classic environment with GRPO task endpoints and LLM proxies.",
825
+ base_task_info=base_info,
826
+ describe_taskset=_describe_taskset,
827
+ provide_task_instances=_provide_instances,
828
+ rollout=rollout_executor,
829
+ dataset_registry=registry,
830
+ rubrics=RubricBundle(outcome=OUTCOME_RUBRIC, events=EVENTS_RUBRIC),
831
+ proxy=ProxyConfig(
832
+ enable_openai=True, enable_groq=True, system_hint=CRAFTING_RULES_SYSTEM_HINT
833
+ ),
834
+ routers=routers,
835
+ app_state=app_state,
836
+ cors_origins=["*"],
837
+ )
838
+ return config
839
+
840
+
841
+ register_task_app(
842
+ entry=TaskAppEntry(
843
+ app_id="grpo-crafter",
844
+ description="Crafter Classic task app with rollout + proxy endpoints",
845
+ config_factory=build_config,
846
+ aliases=("crafter", "crafter-task"),
847
+ modal=ModalDeploymentConfig(
848
+ app_name="grpo-crafter-task-app",
849
+ python_version="3.11",
850
+ pip_packages=(
851
+ "fastapi>=0.100.0",
852
+ "uvicorn>=0.23.0",
853
+ "pydantic>=2.0.0",
854
+ "numpy>=1.24.0",
855
+ "aiohttp>=3.8.0",
856
+ "httpx>=0.24.0",
857
+ "python-dotenv>=1.0.1",
858
+ # Tracing/DB runtime deps
859
+ "sqlalchemy>=2.0.42",
860
+ "aiosqlite>=0.21.0",
861
+ "greenlet>=3.2.3",
862
+ "crafter",
863
+ ),
864
+ extra_local_dirs=(
865
+ # Mount repo root so local modules resolve when deployed on Modal
866
+ (str(REPO_ROOT), "/opt/synth_ai_repo"),
867
+ (str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
868
+ (str(TASK_APP_ROOT), "/opt/synth_ai_repo/examples/warming_up_to_rl/task_app"),
869
+ ),
870
+ secret_names=("groq-api-key", "openai-api-key"),
871
+ memory=16384,
872
+ cpu=4.0,
873
+ max_containers=10,
874
+ ),
875
+ )
876
+ )