synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (889) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. examples/rl/task_app/math_single_step.py +990 -0
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +63 -0
  461. synth_ai/api/train/builders.py +473 -0
  462. synth_ai/api/train/cli.py +1185 -0
  463. synth_ai/api/train/config_finder.py +246 -0
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +352 -0
  470. synth_ai/api/train/pollers.py +91 -0
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +195 -0
  475. synth_ai/api/train/utils.py +244 -0
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +90 -45
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +166 -114
  554. synth_ai/cli/root.py +143 -112
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +3134 -0
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +745 -416
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +7 -1
  583. synth_ai/demos/demo_task_apps/core.py +75 -37
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/config.toml +55 -110
  591. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  592. synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
  593. synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
  594. synth_ai/demos/math/__init__.py +1 -0
  595. synth_ai/demos/math/_common.py +16 -0
  596. synth_ai/demos/math/app.py +38 -0
  597. synth_ai/demos/math/config.toml +76 -0
  598. synth_ai/demos/math/deploy_modal.py +54 -0
  599. synth_ai/demos/math/modal_task_app.py +703 -0
  600. synth_ai/demos/math/task_app_entry.py +51 -0
  601. synth_ai/environments/environment/core.py +7 -1
  602. synth_ai/environments/examples/bandit/engine.py +12 -5
  603. synth_ai/environments/examples/bandit/environment.py +0 -1
  604. synth_ai/environments/examples/bandit/taskset.py +4 -4
  605. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  606. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  607. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  608. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  609. synth_ai/environments/examples/enron/engine.py +7 -2
  610. synth_ai/environments/examples/enron/environment.py +68 -0
  611. synth_ai/environments/examples/red/engine.py +60 -12
  612. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  613. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  614. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  615. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  616. synth_ai/environments/examples/red/environment.py +86 -0
  617. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  618. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  619. synth_ai/environments/examples/verilog/engine.py +104 -12
  620. synth_ai/environments/examples/wordle/environment.py +0 -1
  621. synth_ai/environments/reproducibility/tree.py +5 -6
  622. synth_ai/environments/service/app.py +11 -12
  623. synth_ai/environments/service/core_routes.py +10 -9
  624. synth_ai/environments/stateful/engine.py +1 -1
  625. synth_ai/environments/tasks/core.py +1 -0
  626. synth_ai/environments/tasks/filters.py +5 -6
  627. synth_ai/environments/tasks/utils.py +4 -5
  628. synth_ai/evals/__init__.py +15 -0
  629. synth_ai/evals/base.py +14 -5
  630. synth_ai/evals/client.py +82 -0
  631. synth_ai/evals/types.py +42 -0
  632. synth_ai/http.py +8 -22
  633. synth_ai/http_client.py +45 -12
  634. synth_ai/inference/__init__.py +0 -2
  635. synth_ai/inference/client.py +21 -7
  636. synth_ai/jobs/client.py +129 -80
  637. synth_ai/judge_schemas.py +127 -0
  638. synth_ai/learning/__init__.py +51 -6
  639. synth_ai/learning/algorithms.py +14 -0
  640. synth_ai/learning/client.py +122 -30
  641. synth_ai/learning/config.py +2 -40
  642. synth_ai/learning/constants.py +0 -2
  643. synth_ai/learning/ft_client.py +4 -56
  644. synth_ai/learning/health.py +14 -8
  645. synth_ai/learning/jobs.py +43 -47
  646. synth_ai/learning/prompt_learning_client.py +276 -0
  647. synth_ai/learning/prompt_learning_types.py +185 -0
  648. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  649. synth_ai/learning/rl/client.py +269 -0
  650. synth_ai/learning/rl/config.py +31 -0
  651. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  652. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  653. synth_ai/learning/rl/secrets.py +13 -0
  654. synth_ai/learning/rl_client.py +2 -253
  655. synth_ai/learning/sft/__init__.py +29 -0
  656. synth_ai/learning/sft/client.py +68 -0
  657. synth_ai/learning/sft/config.py +270 -0
  658. synth_ai/learning/sft/data.py +698 -0
  659. synth_ai/learning/sse.py +25 -26
  660. synth_ai/learning/validators.py +29 -25
  661. synth_ai/mcp/__init__.py +5 -0
  662. synth_ai/mcp/__main__.py +8 -0
  663. synth_ai/mcp/main.py +254 -0
  664. synth_ai/mcp/setup.py +100 -0
  665. synth_ai/modal.py +257 -0
  666. synth_ai/pricing/__init__.py +3 -0
  667. synth_ai/pricing/model_pricing.py +64 -0
  668. synth_ai/session/__init__.py +75 -0
  669. synth_ai/session/client.py +383 -0
  670. synth_ai/session/constants.py +63 -0
  671. synth_ai/session/exceptions.py +105 -0
  672. synth_ai/session/manager.py +139 -0
  673. synth_ai/session/models.py +89 -0
  674. synth_ai/session/query.py +110 -0
  675. synth_ai/spec/__init__.py +46 -0
  676. synth_ai/spec/dataclasses.py +149 -0
  677. synth_ai/spec/loader.py +144 -0
  678. synth_ai/spec/serializer.py +199 -0
  679. synth_ai/spec/validation.py +250 -0
  680. synth_ai/streaming/__init__.py +29 -0
  681. synth_ai/streaming/config.py +94 -0
  682. synth_ai/streaming/handlers.py +589 -0
  683. synth_ai/streaming/streamer.py +320 -0
  684. synth_ai/streaming/types.py +95 -0
  685. synth_ai/task/__init__.py +116 -3
  686. synth_ai/task/apps/__init__.py +132 -0
  687. synth_ai/task/auth.py +165 -0
  688. synth_ai/task/client.py +167 -0
  689. synth_ai/task/config.py +261 -0
  690. synth_ai/task/contracts.py +173 -57
  691. synth_ai/task/datasets.py +108 -0
  692. synth_ai/task/errors.py +50 -0
  693. synth_ai/task/health.py +17 -11
  694. synth_ai/task/inference_api.py +101 -0
  695. synth_ai/task/json.py +111 -0
  696. synth_ai/task/proxy.py +251 -0
  697. synth_ai/task/rubrics/__init__.py +55 -0
  698. synth_ai/task/rubrics/loaders.py +156 -0
  699. synth_ai/task/rubrics/models.py +57 -0
  700. synth_ai/task/rubrics/scoring.py +116 -0
  701. synth_ai/task/rubrics/strict.py +149 -0
  702. synth_ai/task/rubrics.py +219 -0
  703. synth_ai/task/server.py +432 -0
  704. synth_ai/task/trace_correlation_helpers.py +328 -0
  705. synth_ai/task/tracing_utils.py +95 -0
  706. synth_ai/task/validators.py +449 -6
  707. synth_ai/task/vendors.py +59 -0
  708. synth_ai/tracing_v3/__init__.py +4 -0
  709. synth_ai/tracing_v3/abstractions.py +21 -4
  710. synth_ai/tracing_v3/config.py +167 -22
  711. synth_ai/tracing_v3/constants.py +21 -0
  712. synth_ai/tracing_v3/db_config.py +42 -29
  713. synth_ai/tracing_v3/decorators.py +80 -45
  714. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  715. synth_ai/tracing_v3/hooks.py +6 -4
  716. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  717. synth_ai/tracing_v3/migration_helper.py +1 -2
  718. synth_ai/tracing_v3/replica_sync.py +12 -7
  719. synth_ai/tracing_v3/serialization.py +130 -0
  720. synth_ai/tracing_v3/session_tracer.py +86 -21
  721. synth_ai/tracing_v3/storage/base.py +98 -12
  722. synth_ai/tracing_v3/storage/config.py +63 -16
  723. synth_ai/tracing_v3/storage/factory.py +11 -9
  724. synth_ai/tracing_v3/storage/utils.py +15 -11
  725. synth_ai/tracing_v3/trace_utils.py +317 -0
  726. synth_ai/tracing_v3/turso/__init__.py +8 -21
  727. synth_ai/tracing_v3/turso/daemon.py +123 -15
  728. synth_ai/tracing_v3/turso/models.py +5 -2
  729. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  730. synth_ai/tracing_v3/utils.py +5 -4
  731. synth_ai/tunnel.py +143 -0
  732. synth_ai/tunnel_deploy.py +278 -0
  733. synth_ai/types.py +8 -0
  734. synth_ai/urls.py +11 -0
  735. synth_ai/utils/__init__.py +166 -0
  736. synth_ai/utils/agents.py +74 -0
  737. synth_ai/utils/apps.py +152 -0
  738. synth_ai/utils/base_url.py +94 -0
  739. synth_ai/utils/bin.py +39 -0
  740. synth_ai/utils/claude.py +36 -0
  741. synth_ai/utils/cli.py +284 -0
  742. synth_ai/utils/config.py +81 -0
  743. synth_ai/utils/env.py +346 -0
  744. synth_ai/utils/errors.py +85 -0
  745. synth_ai/utils/http.py +172 -0
  746. synth_ai/utils/json.py +72 -0
  747. synth_ai/utils/log_filter.py +99 -0
  748. synth_ai/utils/logging.py +198 -0
  749. synth_ai/utils/modal.py +299 -0
  750. synth_ai/utils/paths.py +95 -0
  751. synth_ai/utils/process.py +233 -0
  752. synth_ai/utils/prompts.py +39 -0
  753. synth_ai/utils/sqld.py +122 -0
  754. synth_ai/utils/ssl.py +25 -0
  755. synth_ai/utils/task_app_discovery.py +882 -0
  756. synth_ai/utils/task_app_env.py +186 -0
  757. synth_ai/utils/task_app_state.py +318 -0
  758. synth_ai/utils/tunnel/__init__.py +12 -0
  759. synth_ai/utils/tunnel/config.py +55 -0
  760. synth_ai/utils/user_config.py +137 -0
  761. synth_ai/uvicorn.py +77 -0
  762. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  763. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  764. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  765. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  766. synth_ai/cli/man.py +0 -106
  767. synth_ai/core/experiment.py +0 -15
  768. synth_ai/core/system.py +0 -15
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -63
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/tracing/__init__.py +0 -30
  838. synth_ai/tracing_v1/__init__.py +0 -33
  839. synth_ai/tracing_v3/turso/manager.py +0 -760
  840. synth_ai/v0/tracing/abstractions.py +0 -224
  841. synth_ai/v0/tracing/base_client.py +0 -91
  842. synth_ai/v0/tracing/client_manager.py +0 -131
  843. synth_ai/v0/tracing/config.py +0 -142
  844. synth_ai/v0/tracing/context.py +0 -146
  845. synth_ai/v0/tracing/decorators.py +0 -682
  846. synth_ai/v0/tracing/events/__init__.py +0 -0
  847. synth_ai/v0/tracing/events/manage.py +0 -147
  848. synth_ai/v0/tracing/events/scope.py +0 -86
  849. synth_ai/v0/tracing/events/store.py +0 -228
  850. synth_ai/v0/tracing/immediate_client.py +0 -151
  851. synth_ai/v0/tracing/local.py +0 -18
  852. synth_ai/v0/tracing/log_client_base.py +0 -73
  853. synth_ai/v0/tracing/retry_queue.py +0 -186
  854. synth_ai/v0/tracing/trackers.py +0 -515
  855. synth_ai/v0/tracing/upload.py +0 -512
  856. synth_ai/v0/tracing/utils.py +0 -9
  857. synth_ai/v0/tracing_v1/__init__.py +0 -16
  858. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  859. synth_ai/v0/tracing_v1/base_client.py +0 -91
  860. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  861. synth_ai/v0/tracing_v1/config.py +0 -142
  862. synth_ai/v0/tracing_v1/context.py +0 -146
  863. synth_ai/v0/tracing_v1/decorators.py +0 -703
  864. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  865. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  866. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  867. synth_ai/v0/tracing_v1/events/store.py +0 -228
  868. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  869. synth_ai/v0/tracing_v1/local.py +0 -18
  870. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  871. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  872. synth_ai/v0/tracing_v1/trackers.py +0 -515
  873. synth_ai/v0/tracing_v1/upload.py +0 -527
  874. synth_ai/v0/tracing_v1/utils.py +0 -9
  875. synth_ai/zyk/__init__.py +0 -30
  876. synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
  877. synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
  878. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  879. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  880. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  881. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  882. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  885. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  886. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  887. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  888. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  889. {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1055 @@
1
+ """Task App configuration for the GRPO Crafter example."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import json
6
+ import logging
7
+ import os
8
+ import sys
9
+ from urllib.parse import parse_qs, urlparse
10
+ from collections.abc import Iterable, Sequence
11
+ from contextlib import suppress
12
+ from dataclasses import dataclass
13
+ from datetime import UTC, datetime
14
+ from pathlib import Path
15
+ from typing import Any
16
+
17
+ from fastapi import HTTPException
18
+ from pydantic import BaseModel
19
+
20
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
21
+ from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
22
+ from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
23
+ from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
24
+ from synth_ai.task.rubrics import load_rubric
25
+ from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
26
+ from synth_ai.task.validators import normalize_inference_url
27
+ from synth_ai.task.tracing_utils import (
28
+ build_tracer_factory,
29
+ resolve_sft_output_dir,
30
+ resolve_tracing_db_url,
31
+ tracing_env_enabled,
32
+ )
33
+ from synth_ai.tracing_v3.session_tracer import SessionTracer
34
+
35
+ try:
36
+ from .synth_envs_hosted.utils import (
37
+ ensure_chat_completions_url,
38
+ extract_trace_correlation_id,
39
+ )
40
+ except Exception: # pragma: no cover - utils unavailable if optional deps missing
41
+ def ensure_chat_completions_url(raw_url, mode=None):
42
+ """Fallback to shared utility for URL normalization."""
43
+ return normalize_inference_url(raw_url) if raw_url else raw_url
44
+
45
+ def extract_trace_correlation_id(_raw_url, mode=None):
46
+ if not isinstance(_raw_url, str):
47
+ return None
48
+ parsed = urlparse(_raw_url)
49
+ query_params = parse_qs(parsed.query or "")
50
+ for key in ("cid", "trace", "trace_correlation_id"):
51
+ values = query_params.get(key) or []
52
+ for value in values:
53
+ if isinstance(value, str) and value.strip():
54
+ return value.strip()
55
+ return None
56
+ logger = logging.getLogger(__name__)
57
+
58
+ DEFAULT_ALIAS_OPS: list[str] = ["agent", "env"] * 10
59
+ DEFAULT_ALIAS_STEP_REWARDS: dict[str, Any] = {
60
+ "enabled": True,
61
+ "mode": "decision_stepwise",
62
+ "indicator_lambda": 1.0,
63
+ "step_beta": 0.0,
64
+ }
65
+
66
+ _HERE = Path(__file__).resolve()
67
+
68
+
69
+ def _resolve_repo_root() -> Path:
70
+ """Best-effort detection of the Synth AI repo root across local and Modal mounts."""
71
+
72
+ candidates: list[Path] = []
73
+ env_root = os.getenv("SYNTH_AI_REPO_ROOT")
74
+ if env_root:
75
+ candidates.append(Path(env_root).expanduser())
76
+ candidates.append(Path("/opt/synth_ai_repo"))
77
+ candidates.extend(parent for parent in [_HERE.parent, *_HERE.parents])
78
+
79
+ for candidate in candidates:
80
+ try:
81
+ resolved = candidate.resolve()
82
+ except Exception:
83
+ continue
84
+ if not resolved.exists():
85
+ continue
86
+ if (resolved / "pyproject.toml").exists() or (resolved / "uv.lock").exists():
87
+ return resolved
88
+ if (resolved / "synth_ai").is_dir():
89
+ return resolved
90
+
91
+ try:
92
+ return _HERE.parents[3]
93
+ except IndexError:
94
+ return _HERE.parent
95
+
96
+
97
+ def _resolve_task_app_root(repo_root: Path) -> Path:
98
+ """Locate the task_app directory even when the module is copied to a temp mount."""
99
+
100
+ preferred = (repo_root / "examples" / "task_apps" / "crafter" / "task_app").resolve()
101
+ if preferred.is_dir():
102
+ return preferred
103
+
104
+ local_parent = _HERE.parent.resolve()
105
+ if (local_parent / "synth_envs_hosted").is_dir():
106
+ return local_parent
107
+
108
+ for parent in _HERE.parents:
109
+ candidate = parent.resolve()
110
+ if (candidate / "synth_envs_hosted").is_dir():
111
+ return candidate
112
+
113
+ fallback = Path("/opt/synth_ai_repo/examples/task_apps/crafter/task_app")
114
+ if fallback.is_dir():
115
+ return fallback.resolve()
116
+
117
+ return local_parent
118
+
119
+
120
+ REPO_ROOT = _resolve_repo_root()
121
+ TASK_APP_ROOT = _resolve_task_app_root(REPO_ROOT)
122
+ SYNTH_ENVS_HOSTED_ROOT = (TASK_APP_ROOT / "synth_envs_hosted").resolve()
123
+
124
+ EXAMPLES_ROOT = (REPO_ROOT / "examples").resolve()
125
+ RUBRICS_ROOT = (EXAMPLES_ROOT / "multi_step" / "rubrics").resolve()
126
+
127
+ DEFAULT_OUTCOME_RUBRIC_DATA: dict[str, Any] = {
128
+ "version": "1",
129
+ "goal_text": (
130
+ "Reward episodes that climb the Crafter achievement ladder, stockpile key resources "
131
+ "(especially wood), and finish alive with clear understanding of any failure."
132
+ ),
133
+ "aggregation": "weighted_sum",
134
+ "criteria": [
135
+ {
136
+ "id": "achievement_progression",
137
+ "description": (
138
+ "Weigh achievements by tier: late-game unlocks (iron tools, furnace, armor) earn "
139
+ "the most, mid-tier crafting (stone tools, furnace prep) gets partial credit, early "
140
+ "tasks (collecting saplings/wood tools) only lightly scored."
141
+ ),
142
+ "weight": 0.35,
143
+ },
144
+ {
145
+ "id": "resource_stockpile",
146
+ "description": (
147
+ "Assess resource totals with emphasis on wood stores; high scores require abundant "
148
+ "wood plus supporting materials (stone, coal, iron) that signal readiness for "
149
+ "crafting."
150
+ ),
151
+ "weight": 0.2,
152
+ },
153
+ {
154
+ "id": "survival_state",
155
+ "description": (
156
+ "Reward finishing alive with healthy food/drink bars and safe positioning; penalize "
157
+ "deaths, low vitals, or lingering hazards at episode end."
158
+ ),
159
+ "weight": 0.2,
160
+ },
161
+ {
162
+ "id": "failure_analysis",
163
+ "description": (
164
+ "If the run ends in death or timeout, clearly identify the cause and deduct unless "
165
+ "the agent mitigated risk; highlight when the agent survives despite danger."
166
+ ),
167
+ "weight": 0.15,
168
+ },
169
+ {
170
+ "id": "future_readiness",
171
+ "description": (
172
+ "Describe how prepared the agent is for the next objectives (tools crafted, shelters, "
173
+ "furnaces, smelted materials) and whether the inventory supports further progress."
174
+ ),
175
+ "weight": 0.1,
176
+ },
177
+ ],
178
+ }
179
+
180
+ DEFAULT_EVENTS_RUBRIC_DATA: dict[str, Any] = {
181
+ "version": "1",
182
+ "goal_text": (
183
+ "Score each decision in proportion to the concrete Crafter achievement progress it "
184
+ "delivers, topping out the scale when the log shows a fresh achievement unlock and keeping "
185
+ "routine upkeep near zero."
186
+ ),
187
+ "aggregation": "weighted_sum",
188
+ "criteria": [
189
+ {
190
+ "id": "achievement_unlocks",
191
+ "description": (
192
+ "Assign 0.9-1.0 when the decision explicitly unlocks a new Crafter achievement (look "
193
+ 'for "Achievement unlocked" messages or equivalent deterministic completions such as '
194
+ "placing a furnace that immediately crafts ingots). Cap the score at 0.4 when no new "
195
+ "achievement fires, and drop to <=0.1 if the turn repeats known actions without "
196
+ "measurable progress."
197
+ ),
198
+ "weight": 0.55,
199
+ },
200
+ {
201
+ "id": "milestone_setup",
202
+ "description": (
203
+ "Give 0.5-0.7 when the action completes the last prerequisite for a specific upcoming "
204
+ "achievement (e.g., gathering the final ore before smelting, crafting sticks right "
205
+ "before a tool). Keep the score <=0.3 if the progress is speculative or still several "
206
+ "steps away."
207
+ ),
208
+ "weight": 0.2,
209
+ },
210
+ {
211
+ "id": "inventory_depth",
212
+ "description": (
213
+ "Reward 0.3-0.5 for pulls that clearly deepen critical buffers (fuel, food, ore) and "
214
+ "immediately unblock the next milestone. If resources are already plentiful or the "
215
+ "haul is generic filler, stay at <=0.2."
216
+ ),
217
+ "weight": 0.15,
218
+ },
219
+ {
220
+ "id": "execution_quality",
221
+ "description": (
222
+ "Only add up to 0.1 for clean, legal execution that avoids wasted turns; drop to 0.0 "
223
+ "whenever the agent idles, repeats failed moves, or takes damage without compensating "
224
+ "progress."
225
+ ),
226
+ "weight": 0.1,
227
+ },
228
+ ],
229
+ }
230
+
231
+ for path in (REPO_ROOT, TASK_APP_ROOT, SYNTH_ENVS_HOSTED_ROOT, EXAMPLES_ROOT):
232
+ try:
233
+ resolved = path.resolve()
234
+ except Exception:
235
+ resolved = path
236
+ if resolved.exists():
237
+ path_str = str(resolved)
238
+ if path_str not in sys.path:
239
+ sys.path.insert(0, path_str)
240
+
241
+ # Fallback: explicitly add Modal mount path for 'examples' if REPO_ROOT detection fails
242
+ try:
243
+ _hard_examples = Path("/opt/synth_ai_repo/examples")
244
+ if _hard_examples.exists():
245
+ _hard_examples_str = str(_hard_examples.resolve())
246
+ if _hard_examples_str not in sys.path:
247
+ sys.path.insert(0, _hard_examples_str)
248
+ except Exception:
249
+ pass
250
+
251
+ def _load_rubric_with_fallback(filename: str, fallback: dict[str, Any]):
252
+ """Load rubric from JSON file when available, otherwise use bundled fallback."""
253
+
254
+ search_paths = [RUBRICS_ROOT / filename, TASK_APP_ROOT / "rubrics" / filename]
255
+ for path in search_paths:
256
+ try:
257
+ if path.exists():
258
+ logger.debug("Loading rubric from %s", path)
259
+ return load_rubric(str(path))
260
+ except Exception as exc:
261
+ logger.warning("Failed to load rubric %s from %s: %s", filename, path, exc)
262
+
263
+ logger.warning("Falling back to inline rubric %s: file not available", filename)
264
+ try:
265
+ materialized = search_paths[0]
266
+ materialized.parent.mkdir(parents=True, exist_ok=True)
267
+ materialized.write_text(json.dumps(fallback, indent=2), encoding="utf-8")
268
+ except Exception:
269
+ logger.debug("Unable to materialize inline rubric %s", filename, exc_info=True)
270
+ return load_rubric(fallback)
271
+
272
+
273
+ HAS_HOSTED = True
274
+ try:
275
+ import crafter # type: ignore
276
+ import crafter.constants as crafter_constants # type: ignore
277
+ from synth_ai.environments.examples.crafter_classic.taskset import TRAIT_BOUNDS
278
+ from synth_envs_hosted.branching import router as branching_router # type: ignore
279
+ from synth_envs_hosted.environment_routes import router as environment_router # type: ignore
280
+ from synth_envs_hosted.hosted_app import TaskApp as HostedTaskApp # type: ignore
281
+ from synth_envs_hosted.policy_routes import router as policy_router # type: ignore
282
+ from synth_envs_hosted.rollout import ( # type: ignore
283
+ RolloutEnvSpec as LegacyRolloutEnvSpec,
284
+ )
285
+ from synth_envs_hosted.rollout import (
286
+ RolloutPolicySpec as LegacyRolloutPolicySpec,
287
+ )
288
+ from synth_envs_hosted.rollout import (
289
+ RolloutRecordConfig as LegacyRolloutRecordConfig,
290
+ )
291
+ from synth_envs_hosted.rollout import (
292
+ RolloutRequest as LegacyRolloutRequest,
293
+ )
294
+ from synth_envs_hosted.rollout import (
295
+ RolloutResponse as LegacyRolloutResponse,
296
+ )
297
+ from synth_envs_hosted.rollout import (
298
+ RolloutSafetyConfig as LegacyRolloutSafetyConfig,
299
+ )
300
+ from synth_envs_hosted.rollout import (
301
+ execute_rollout as legacy_execute_rollout,
302
+ )
303
+ except Exception as exc: # pragma: no cover - import-time validation
304
+ # Provide a more actionable error with the missing module and fix hints
305
+ missing_mod = None
306
+ if isinstance(exc, ModuleNotFoundError):
307
+ missing_mod = (
308
+ getattr(exc, "name", None) or str(exc).split("'")[1] if "'" in str(exc) else None
309
+ )
310
+ fix_hint = None
311
+ if missing_mod:
312
+ mapping = {
313
+ "dotenv": "python-dotenv",
314
+ "crafter": "crafter",
315
+ "httpx": "httpx",
316
+ "aiohttp": "aiohttp",
317
+ "fastapi": "fastapi",
318
+ "uvicorn": "uvicorn",
319
+ "sqlalchemy": "sqlalchemy",
320
+ "aiosqlite": "aiosqlite",
321
+ "greenlet": "greenlet",
322
+ }
323
+ pkg = mapping.get(missing_mod, missing_mod)
324
+ fix_hint = (
325
+ f"Missing Python module '{missing_mod}'. Install the package '{pkg}'.\n"
326
+ f"For Modal: add '{pkg}' to ModalDeploymentConfig.pip_packages in synth_ai/task/apps/grpo_crafter.py.\n"
327
+ f"Locally: pip install {pkg}"
328
+ )
329
+ # Allow running without synth_envs_hosted; gate hosted features off
330
+ if missing_mod == "synth_envs_hosted":
331
+ HAS_HOSTED = False
332
+ else:
333
+ detailed = (
334
+ "grpo_crafter task app requires example dependencies and runtime libs.\n"
335
+ + (fix_hint + "\n" if fix_hint else "")
336
+ + f"Original error: {exc}"
337
+ )
338
+ raise RuntimeError(detailed) from exc
339
+
340
+
341
+ CRAFTING_RULES_SYSTEM_HINT = (
342
+ "Crafter crafting rules (from the paper):\n"
343
+ "- Make Wood Pickaxe: Nearby a table; have wood in inventory.\n"
344
+ "- Make Stone Pickaxe: Nearby a table; have wood and stone in inventory.\n"
345
+ "- Make Iron Pickaxe: Nearby a table; furnace exists; have wood, coal, and iron in inventory.\n"
346
+ "- Make Wood Sword: Nearby a table; have wood in inventory.\n"
347
+ "- Make Stone Sword: Nearby a table; have wood and stone in inventory.\n"
348
+ "- Make Iron Sword: Nearby a table; furnace exists; have wood, coal, and iron in inventory."
349
+ )
350
+
351
+
352
+ DATASET_SPEC = TaskDatasetSpec(
353
+ id="crafter_classic_procedural",
354
+ name="Crafter Classic Procedural Seeds",
355
+ version="1.0.0",
356
+ splits=["train"],
357
+ default_split="train",
358
+ description="Procedural Crafter Classic seeds with reproducible world traits.",
359
+ )
360
+
361
+
362
+ @dataclass
363
+ class CrafterDataset:
364
+ spec: TaskDatasetSpec
365
+
366
+ def __post_init__(self) -> None:
367
+ self.default_seed = int(env_value("CRAFTER_DEFAULT_SEED", 42))
368
+ self.seed_min = 0
369
+ self.seed_max = int(env_value("CRAFTER_MAX_SEED", 2**31 - 1))
370
+ area_env = env_value("CRAFTER_AREA", "64,64")
371
+ self.area = tuple(int(x) for x in str(area_env).split(","))
372
+ self.length = int(env_value("CRAFTER_EPISODE_LENGTH", 10000))
373
+ self._cache: dict[int, dict[str, Any]] = {}
374
+
375
+ def config_for_seed(self, seed: int) -> dict[str, Any]:
376
+ return {
377
+ "seed": int(seed),
378
+ "area": list(self.area),
379
+ "length": self.length,
380
+ }
381
+
382
+ def describe_seed(self, seed: int) -> dict[str, Any]:
383
+ seed = int(seed)
384
+ if seed in self._cache:
385
+ return self._cache[seed]
386
+ env = crafter.Env(area=self.area, length=self.length, seed=seed)
387
+ try:
388
+ env.reset()
389
+ traits = _compute_world_traits(env)
390
+ player = getattr(env, "_player", None)
391
+ inventory = dict(getattr(player, "inventory", {})) if player else {}
392
+ position = getattr(player, "pos", None)
393
+ finally:
394
+ close_fn = getattr(env, "close", None)
395
+ if callable(close_fn):
396
+ close_fn()
397
+ summary = {
398
+ "seed": seed,
399
+ "difficulty": self._difficulty(traits),
400
+ "traits": traits,
401
+ "inventory": inventory,
402
+ "player_position": list(position) if position is not None else None,
403
+ "config": self.config_for_seed(seed),
404
+ }
405
+ self._cache[seed] = summary
406
+ return summary
407
+
408
+ def _difficulty(self, traits: dict[str, int]) -> str:
409
+ for difficulty, bounds in TRAIT_BOUNDS.items():
410
+ if traits.get("trees", 0) >= bounds.get("min_trees", 0) and traits.get(
411
+ "hostiles", 0
412
+ ) <= bounds.get("max_hostiles", 0):
413
+ return difficulty
414
+ return "custom"
415
+
416
+ @property
417
+ def seed_range(self) -> list[int]:
418
+ return [self.seed_min, self.seed_max]
419
+
420
+
421
+ def _compute_world_traits(env: crafter.Env, radius: int = 10) -> dict[str, int]:
422
+ # Local copy to avoid import-time issues; mirrors synth_ai.environments.examples.crafter_classic.taskset.world_traits
423
+ import numpy as _np # type: ignore
424
+ from crafter import objects as _objects # type: ignore
425
+
426
+ player = getattr(env, "_player", None)
427
+ if player is None:
428
+ return {"trees": 0, "cows": 0, "hostiles": 0}
429
+ pos = _np.array(getattr(player, "pos", [0, 0]))
430
+ counts = {"trees": 0, "cows": 0, "hostiles": 0}
431
+ world = getattr(env, "_world", None)
432
+ objects = getattr(world, "_objects", []) if world is not None else []
433
+ for obj in objects:
434
+ if obj is None or obj is player:
435
+ continue
436
+ try:
437
+ if _np.abs(obj.pos - pos).sum() > radius:
438
+ continue
439
+ except Exception:
440
+ continue
441
+ if isinstance(obj, _objects.Plant) and getattr(obj, "kind", "") == "tree":
442
+ counts["trees"] += 1
443
+ elif isinstance(obj, _objects.Cow):
444
+ counts["cows"] += 1
445
+ elif isinstance(obj, _objects.Zombie | _objects.Skeleton):
446
+ counts["hostiles"] += 1
447
+ return counts
448
+
449
+
450
+ def env_value(key: str, default: Any) -> Any:
451
+ return os.getenv(key, default)
452
+
453
+
454
+ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
455
+ registry = TaskDatasetRegistry()
456
+ dataset = CrafterDataset(DATASET_SPEC)
457
+ registry.register(DATASET_SPEC, lambda _spec: dataset, cache=True)
458
+ return registry, dataset
459
+
460
+
461
+ def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
462
+ return TaskInfo(
463
+ task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
464
+ environment="crafter",
465
+ action_space={
466
+ "type": "discrete",
467
+ "description": f"Discrete action space with {len(crafter_constants.actions)} actions including movement, crafting, and interaction",
468
+ "size": len(crafter_constants.actions),
469
+ "actions": list(crafter_constants.actions),
470
+ },
471
+ observation={
472
+ "type": "dict",
473
+ "description": "RGB frame (64x64x3) plus inventory counts, achievements, and semantic map patches",
474
+ "summary": "RGB frame plus inventory, achievements, and semantic map patches.",
475
+ "keys": ["image", "inventory", "achievements", "semantic_map_patch7"],
476
+ "image_shape": [64, 64, 3],
477
+ },
478
+ dataset={
479
+ **DATASET_SPEC.model_dump(),
480
+ "seed_range": dataset.seed_range,
481
+ "default_seed": dataset.default_seed,
482
+ },
483
+ rubric={
484
+ "version": "1",
485
+ "criteria_count": 2,
486
+ "source": "inline",
487
+ "aggregation": "weighted_sum",
488
+ },
489
+ inference={
490
+ "supports_proxy": True,
491
+ "endpoints": {
492
+ "openai": "/proxy/v1/chat/completions",
493
+ "groq": "/proxy/groq/v1/chat/completions",
494
+ },
495
+ "tool": {"name": "interact", "parallel_tool_calls": False},
496
+ },
497
+ limits={"max_ops": 100000, "max_time_s": 3600},
498
+ )
499
+
500
+
501
+ OUTCOME_RUBRIC = _load_rubric_with_fallback(
502
+ "crafter_outcome_rubric.json", DEFAULT_OUTCOME_RUBRIC_DATA
503
+ )
504
+
505
+ EVENTS_RUBRIC = _load_rubric_with_fallback(
506
+ "crafter_events_rubric.json", DEFAULT_EVENTS_RUBRIC_DATA
507
+ )
508
+
509
+
510
+ def describe_taskset(dataset: CrafterDataset) -> dict[str, Any]:
511
+ return {
512
+ **DATASET_SPEC.model_dump(),
513
+ "seed_range": dataset.seed_range,
514
+ "default_seed": dataset.default_seed,
515
+ "config": {
516
+ "area": list(dataset.area),
517
+ "length": dataset.length,
518
+ },
519
+ }
520
+
521
+
522
+ def provide_task_instances(
523
+ dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
524
+ ) -> Iterable[TaskInfo]:
525
+ infos: list[TaskInfo] = []
526
+ base_observation = getattr(base_info, "observation", None)
527
+ if hasattr(base_observation, "model_dump"):
528
+ observation_template = base_observation.model_dump()
529
+ elif isinstance(base_observation, dict):
530
+ observation_template = dict(base_observation)
531
+ else:
532
+ observation_template = {}
533
+
534
+ for seed_value in seeds:
535
+ summary = dataset.describe_seed(seed_value)
536
+ infos.append(
537
+ TaskInfo(
538
+ task=base_info.task,
539
+ environment=base_info.environment,
540
+ action_space=base_info.action_space,
541
+ observation={
542
+ **observation_template,
543
+ "seed": seed_value,
544
+ "traits": summary["traits"],
545
+ "inventory": summary["inventory"],
546
+ "player_position": summary["player_position"],
547
+ },
548
+ dataset={
549
+ **base_info.dataset.model_dump(),
550
+ "seed": seed_value,
551
+ "difficulty": summary["difficulty"],
552
+ "config": summary["config"],
553
+ },
554
+ rubric=base_info.rubric,
555
+ inference=base_info.inference,
556
+ limits=base_info.limits,
557
+ )
558
+ )
559
+ return infos
560
+
561
+
562
+ def _normalise_op(op_value: Any, index: int) -> str:
563
+ if isinstance(op_value, str):
564
+ candidate = op_value
565
+ elif isinstance(op_value, dict):
566
+ candidate = op_value.get("type") or op_value.get("op")
567
+ else:
568
+ candidate = None
569
+ if not candidate:
570
+ raise ValueError(f"Missing op type at index {index}")
571
+ lowered = str(candidate).strip().lower()
572
+ if lowered in {"policy", "agent", "model"}:
573
+ return "agent"
574
+ if lowered in {"env", "environment", "step"}:
575
+ return "env"
576
+ raise ValueError(f"Unsupported op type '{candidate}' at index {index}")
577
+
578
+
579
+ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
580
+ """Map legacy math env/policy names to crafter and enrich rollout defaults."""
581
+
582
+ def _needs_crafter(name: str | None) -> bool:
583
+ if not name:
584
+ return False
585
+ lowered = str(name).strip().lower()
586
+ return lowered.startswith("math")
587
+
588
+ env_updates: dict[str, Any] = {}
589
+ policy_updates: dict[str, Any] = {}
590
+ alias_applied = False
591
+
592
+ if _needs_crafter(request.env.env_name):
593
+ env_updates["env_name"] = "crafter"
594
+ alias_applied = True
595
+ if request.env.env_id and _needs_crafter(request.env.env_id):
596
+ env_updates["env_id"] = None
597
+ alias_applied = True
598
+ if _needs_crafter(request.policy.policy_name):
599
+ policy_updates["policy_name"] = "crafter-react"
600
+ alias_applied = True
601
+ if request.policy.policy_id and _needs_crafter(request.policy.policy_id):
602
+ policy_updates["policy_id"] = None
603
+ alias_applied = True
604
+
605
+ if not alias_applied:
606
+ return request
607
+
608
+ updated_env = request.env.model_copy(update=env_updates) if env_updates else request.env
609
+ updated_policy = (
610
+ request.policy.model_copy(update=policy_updates) if policy_updates else request.policy
611
+ )
612
+
613
+ env_cfg = dict(updated_env.config or {})
614
+ env_cfg.setdefault("difficulty", "normal")
615
+ env_cfg.setdefault("step_rewards", dict(DEFAULT_ALIAS_STEP_REWARDS))
616
+ env_cfg.setdefault("env_params", {"max_steps_per_episode": 200})
617
+ updated_env = updated_env.model_copy(update={"config": env_cfg})
618
+
619
+ policy_cfg = dict(updated_policy.config or {})
620
+ policy_cfg.setdefault("max_llm_calls", 10)
621
+ policy_cfg.setdefault("max_completion_tokens", 1024)
622
+ policy_cfg.setdefault("temperature", 0.2)
623
+ policy_cfg.setdefault("step_rewards", dict(DEFAULT_ALIAS_STEP_REWARDS))
624
+ updated_policy = updated_policy.model_copy(update={"config": policy_cfg})
625
+
626
+ ops_override = request.ops
627
+ if not ops_override or len(ops_override) < len(DEFAULT_ALIAS_OPS):
628
+ ops_override = list(DEFAULT_ALIAS_OPS)
629
+
630
+ coerced = request.model_copy(update={"env": updated_env, "policy": updated_policy, "ops": ops_override})
631
+
632
+ with suppress(Exception):
633
+ print(
634
+ "[rollout] remapped math request -> crafter "
635
+ f"(env={request.env.env_name!r}→{coerced.env.env_name!r}, "
636
+ f"policy={request.policy.policy_name!r}→{coerced.policy.policy_name!r})",
637
+ flush=True,
638
+ )
639
+ with suppress(Exception):
640
+ logger.info(
641
+ "ROLLOUT_ALIAS: remapped math env/policy to crafter (env=%s→%s, policy=%s→%s)",
642
+ request.env.env_name,
643
+ coerced.env.env_name,
644
+ request.policy.policy_name,
645
+ coerced.policy.policy_name,
646
+ )
647
+
648
+ return coerced
649
+
650
+
651
+ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None) -> str | None:
652
+ """Best-effort extraction of the trace correlation identifier."""
653
+ candidates: list[Any] = [
654
+ policy_cfg.get("trace_correlation_id"),
655
+ policy_cfg.get("trace"),
656
+ ]
657
+ logger.debug(
658
+ "_resolve_trace_correlation_id: inspecting policy_cfg keys=%s candidates=%s",
659
+ sorted(policy_cfg.keys()),
660
+ candidates,
661
+ )
662
+ for candidate in candidates:
663
+ if isinstance(candidate, str):
664
+ stripped = candidate.strip()
665
+ if stripped:
666
+ return stripped
667
+
668
+ return extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=mode)
669
+
670
+
671
+ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
672
+ request = _coerce_math_to_crafter(request)
673
+
674
+ record_cfg = request.record.model_copy(
675
+ update={
676
+ "return_trace": True,
677
+ "trace_format": "structured",
678
+ }
679
+ )
680
+ request = request.model_copy(update={"record": record_cfg})
681
+
682
+ policy_cfg = dict(request.policy.config or {})
683
+ logger.info(
684
+ "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
685
+ sorted(policy_cfg.keys()),
686
+ policy_cfg.get("inference_url"),
687
+ request.run_id,
688
+ request.mode,
689
+ )
690
+ inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
691
+ if isinstance(inferred_url, str) and inferred_url:
692
+ if inferred_url != policy_cfg.get("inference_url"):
693
+ logger.warning(
694
+ "ROLLOUT_EXEC: normalized inference_url run_id=%s from %s to %s",
695
+ request.run_id,
696
+ policy_cfg.get("inference_url"),
697
+ inferred_url,
698
+ )
699
+ policy_cfg["inference_url"] = inferred_url
700
+ else:
701
+ logger.warning(
702
+ "ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
703
+ request.run_id,
704
+ policy_cfg.get("inference_url"),
705
+ )
706
+
707
+ trace_correlation_id = _resolve_trace_correlation_id(policy_cfg, mode=request.mode)
708
+
709
+ # ASSERTION: trace_correlation_id MUST be present for RL mode (but not EVAL mode)
710
+ if request.mode == RolloutMode.RL:
711
+ assert trace_correlation_id is not None, (
712
+ f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
713
+ f"policy_cfg_keys={sorted(policy_cfg.keys())} "
714
+ f"inference_url={policy_cfg.get('inference_url')}"
715
+ )
716
+ assert isinstance(trace_correlation_id, str) and trace_correlation_id.strip(), (
717
+ f"FATAL: trace_correlation_id is empty for run_id={request.run_id}. "
718
+ f"Got: {trace_correlation_id!r}"
719
+ )
720
+
721
+ if trace_correlation_id:
722
+ policy_cfg["trace_correlation_id"] = trace_correlation_id
723
+ logger.info(
724
+ "ROLLOUT_EXEC: resolved trace_correlation_id=%s run_id=%s",
725
+ trace_correlation_id,
726
+ request.run_id,
727
+ )
728
+
729
+ pipeline_metadata: dict[str, Any] = {}
730
+ if trace_correlation_id:
731
+ pipeline_metadata["trace_correlation_id"] = trace_correlation_id
732
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
733
+ pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
734
+ logger.info(
735
+ "ROLLOUT_EXEC: pipeline metadata prepared run_id=%s metadata=%s",
736
+ request.run_id,
737
+ pipeline_metadata,
738
+ )
739
+
740
+ # If hosted env service code is not bundled, return a no-op rollout response compatible with contracts
741
+ if not HAS_HOSTED:
742
+ logger.warning(
743
+ "ROLLOUT_EXEC: HAS_HOSTED disabled, returning stub response run_id=%s metadata=%s",
744
+ request.run_id,
745
+ pipeline_metadata,
746
+ )
747
+ return RolloutResponse(
748
+ run_id=request.run_id,
749
+ trajectories=[],
750
+ branches={},
751
+ metrics=RolloutMetrics(
752
+ episode_returns=[],
753
+ mean_return=0.0,
754
+ num_steps=0,
755
+ num_episodes=0,
756
+ details={},
757
+ ),
758
+ aborted=False,
759
+ ops_executed=0,
760
+ trace=None,
761
+ trace_correlation_id=trace_correlation_id or f"trace_{request.run_id}",
762
+ pipeline_metadata=pipeline_metadata,
763
+ )
764
+
765
+ try:
766
+ max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
767
+ except Exception:
768
+ max_llm_calls = 10
769
+ policy_cfg.setdefault("max_llm_calls", max_llm_calls)
770
+ policy_cfg.setdefault("max_tokens", 512)
771
+ policy_cfg.setdefault("max_completion_tokens", 512)
772
+ policy_cfg.setdefault("temperature", 0.2)
773
+ policy_cfg.setdefault("top_p", 0.95)
774
+
775
+ env_cfg = dict(request.env.config or {})
776
+ env_params = dict(env_cfg.get("env_params") or {})
777
+ try:
778
+ max_steps_episode = int(env_params.get("max_steps_per_episode") or max_llm_calls)
779
+ except Exception:
780
+ max_steps_episode = max_llm_calls
781
+ desired_steps = max(max_llm_calls, max_steps_episode)
782
+ env_params["max_steps_per_episode"] = int(desired_steps)
783
+ env_cfg["env_params"] = env_params
784
+
785
+ updated_policy = request.policy.model_copy(update={"config": policy_cfg})
786
+ updated_env = request.env.model_copy(update={"config": env_cfg})
787
+ request = request.model_copy(update={"policy": updated_policy, "env": updated_env})
788
+
789
+ converted_ops: list[str] = [_normalise_op(op, idx) for idx, op in enumerate(request.ops)]
790
+ max_ops_allowed = max_llm_calls * 2 if max_llm_calls > 0 else len(converted_ops)
791
+ if max_ops_allowed and len(converted_ops) > max_ops_allowed:
792
+ converted_ops = converted_ops[:max_ops_allowed]
793
+ legacy_request = LegacyRolloutRequest(
794
+ run_id=request.run_id,
795
+ mode=request.mode, # Preserve mode for nested requests
796
+ env=LegacyRolloutEnvSpec(
797
+ env_id=request.env.env_id,
798
+ env_name=request.env.env_name,
799
+ config=env_cfg,
800
+ seed=request.env.seed,
801
+ ),
802
+ policy=LegacyRolloutPolicySpec(
803
+ policy_id=request.policy.policy_id,
804
+ policy_name=request.policy.policy_name,
805
+ config=policy_cfg,
806
+ ),
807
+ ops=converted_ops,
808
+ record=LegacyRolloutRecordConfig(**request.record.model_dump()),
809
+ on_done=request.on_done,
810
+ branch=None,
811
+ safety=LegacyRolloutSafetyConfig(**request.safety.model_dump()),
812
+ training_session_id=request.training_session_id,
813
+ synth_base_url=request.synth_base_url,
814
+ )
815
+
816
+ legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
817
+ legacy_request, fastapi_request
818
+ )
819
+ logger.info(
820
+ "ROLLOUT_EXEC: legacy rollout completed run_id=%s trace_id=%s",
821
+ request.run_id,
822
+ trace_correlation_id,
823
+ )
824
+ data = legacy_response.model_dump()
825
+ legacy_trace = getattr(legacy_response, "trace", None)
826
+ if legacy_trace is not None:
827
+ if isinstance(legacy_trace, dict):
828
+ legacy_trace_preview = list(legacy_trace.keys())[:5]
829
+ else:
830
+ legacy_trace_preview = type(legacy_trace)
831
+ logger.info(
832
+ "ROLLOUT_EXEC: legacy response trace present type=%s preview=%s",
833
+ type(legacy_trace),
834
+ legacy_trace_preview,
835
+ )
836
+ logger.debug(
837
+ "ROLLOUT_EXEC: legacy response keys=%s has_trace=%s",
838
+ sorted(data.keys()),
839
+ bool(data.get("trace")),
840
+ )
841
+ metrics = data.get("metrics", {}) or {}
842
+ metrics.setdefault("outcome_score", None)
843
+ metrics.setdefault("events_score", None)
844
+ metrics.setdefault("details", {})
845
+ data["metrics"] = metrics
846
+
847
+ if data.get("trace") is None:
848
+ legacy_trace = getattr(legacy_response, "trace", None)
849
+ if legacy_trace is not None:
850
+ data["trace"] = legacy_trace
851
+ else:
852
+ tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
853
+ if callable(tracer_factory):
854
+ tracer = tracer_factory()
855
+ logger.debug("ROLLOUT_EXEC: trace backfill factory=%s", type(tracer))
856
+ if isinstance(tracer, SessionTracer):
857
+ try:
858
+ await tracer.initialize()
859
+ if tracer.db is not None:
860
+ trace_row = await tracer.db.get_session_trace(request.run_id)
861
+ if trace_row is not None:
862
+ data["trace"] = trace_row
863
+ except Exception as exc:
864
+ logger.warning("TRACE_BACKFILL_FAIL: %s", exc)
865
+ finally:
866
+ with suppress(Exception):
867
+ await tracer.close()
868
+
869
+ # Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
870
+ # Use fallback if somehow missing
871
+ data["trace_correlation_id"] = trace_correlation_id or f"trace_{request.run_id}"
872
+
873
+ # Add trace_correlation_id to pipeline_metadata
874
+ existing_meta = data.get("pipeline_metadata")
875
+ if not isinstance(existing_meta, dict):
876
+ existing_meta = {}
877
+ # ALWAYS set trace_correlation_id (use fallback if needed)
878
+ final_cid = trace_correlation_id or f"trace_{request.run_id}"
879
+ existing_meta["trace_correlation_id"] = final_cid
880
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
881
+ existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
882
+ data["pipeline_metadata"] = existing_meta
883
+
884
+ # Add trace_correlation_id to each trajectory (required for RL training pipeline)
885
+ if "trajectories" in data:
886
+ normalized_trajs: list[dict[str, Any]] = []
887
+ for traj in data.get("trajectories", []):
888
+ if isinstance(traj, BaseModel):
889
+ traj_dict = traj.model_dump()
890
+ elif isinstance(traj, dict):
891
+ traj_dict = dict(traj)
892
+ else:
893
+ continue
894
+ traj_dict["trace_correlation_id"] = final_cid
895
+ if not traj_dict.get("inference_url"):
896
+ inferred_url = policy_cfg.get("inference_url")
897
+ if inferred_url:
898
+ traj_dict["inference_url"] = inferred_url
899
+ normalized_trajs.append(traj_dict)
900
+ if normalized_trajs:
901
+ data["trajectories"] = normalized_trajs
902
+ logger.info(
903
+ "ROLLOUT_EXEC: normalized trajectory sample run_id=%s inference_url=%s",
904
+ request.run_id,
905
+ normalized_trajs[0].get("inference_url") if normalized_trajs else None,
906
+ )
907
+ logger.info(
908
+ "ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
909
+ request.run_id,
910
+ existing_meta,
911
+ )
912
+ if trace_correlation_id and existing_meta.get("trace_correlation_id") != trace_correlation_id:
913
+ logger.error(
914
+ "ROLLOUT_EXEC: metadata trace mismatch run_id=%s expected=%s actual=%s",
915
+ request.run_id,
916
+ trace_correlation_id,
917
+ existing_meta.get("trace_correlation_id"),
918
+ )
919
+ if not existing_meta.get("trace_correlation_id"):
920
+ logger.error(
921
+ "ROLLOUT_EXEC: final metadata missing trace_correlation_id run_id=%s metadata=%s",
922
+ request.run_id,
923
+ existing_meta,
924
+ )
925
+
926
+ if data.get("trace") is None:
927
+ raise HTTPException(
928
+ status_code=500,
929
+ detail="trace_payload_missing: task app did not emit a SessionTrace",
930
+ )
931
+
932
+ # ASSERTION: Verify trace_correlation_id is present in response at all required levels
933
+ assert "trace_correlation_id" in data, (
934
+ f"FATAL: trace_correlation_id missing from top-level response data for run_id={request.run_id}. "
935
+ f"Keys: {list(data.keys())}"
936
+ )
937
+ assert data["trace_correlation_id"] == final_cid, (
938
+ f"FATAL: trace_correlation_id mismatch in response for run_id={request.run_id}. "
939
+ f"Expected: {final_cid!r}, Got: {data.get('trace_correlation_id')!r}"
940
+ )
941
+ assert "pipeline_metadata" in data, (
942
+ f"FATAL: pipeline_metadata missing from response for run_id={request.run_id}"
943
+ )
944
+ assert data["pipeline_metadata"].get("trace_correlation_id") == final_cid, (
945
+ f"FATAL: trace_correlation_id missing or mismatched in pipeline_metadata for run_id={request.run_id}. "
946
+ f"Expected: {final_cid!r}, Got: {data['pipeline_metadata'].get('trace_correlation_id')!r}"
947
+ )
948
+ logger.info(
949
+ "ROLLOUT_EXEC: assertions passed - trace_correlation_id present in response run_id=%s cid=%s",
950
+ request.run_id,
951
+ final_cid,
952
+ )
953
+
954
+ return RolloutResponse.model_validate(data)
955
+
956
+
957
+ def build_config() -> TaskAppConfig:
958
+ registry, dataset = build_dataset()
959
+ base_info = _base_task_info(dataset)
960
+
961
+ hosted_task_app = HostedTaskApp() if HAS_HOSTED else None
962
+
963
+ tracing_enabled = tracing_env_enabled()
964
+ tracing_db_url = resolve_tracing_db_url()
965
+ tracer_factory = build_tracer_factory(
966
+ SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
967
+ )
968
+ sft_output_dir = resolve_sft_output_dir()
969
+
970
+ app_state: dict[str, Any] = {
971
+ "task_app": hosted_task_app,
972
+ "allowed_environments": ["crafter"],
973
+ "tracing_enabled": tracing_enabled,
974
+ }
975
+ if tracer_factory is not None:
976
+ app_state["session_tracer_factory"] = tracer_factory
977
+ if sft_output_dir:
978
+ app_state["sft_output_dir"] = sft_output_dir
979
+
980
+ if tracing_enabled:
981
+ status_msg = f"[task:tracing] enabled (db={tracing_db_url or 'default'})"
982
+ else:
983
+ status_msg = "[task:tracing] disabled"
984
+ print(status_msg, flush=True)
985
+ if sft_output_dir:
986
+ print(f"[task:sft] writing JSONL to {sft_output_dir}", flush=True)
987
+
988
+ def _describe_taskset() -> dict[str, Any]:
989
+ return describe_taskset(dataset)
990
+
991
+ def _provide_instances(seeds: Sequence[int]):
992
+ return provide_task_instances(dataset, base_info, seeds)
993
+
994
+ routers: tuple = (environment_router, policy_router, branching_router) if HAS_HOSTED else ()
995
+
996
+ config = TaskAppConfig(
997
+ app_id="grpo-crafter-task-app",
998
+ name="GRPO Crafter Task App",
999
+ description="Crafter Classic environment with GRPO task endpoints and LLM proxies.",
1000
+ base_task_info=base_info,
1001
+ describe_taskset=_describe_taskset,
1002
+ provide_task_instances=_provide_instances,
1003
+ rollout=rollout_executor,
1004
+ dataset_registry=registry,
1005
+ rubrics=RubricBundle(outcome=OUTCOME_RUBRIC, events=EVENTS_RUBRIC),
1006
+ proxy=ProxyConfig(
1007
+ enable_openai=True, enable_groq=True, system_hint=CRAFTING_RULES_SYSTEM_HINT
1008
+ ),
1009
+ routers=routers,
1010
+ app_state=app_state,
1011
+ cors_origins=["*"],
1012
+ )
1013
+ return config
1014
+
1015
+
1016
+ register_task_app(
1017
+ entry=TaskAppEntry(
1018
+ app_id="grpo-crafter-task-app",
1019
+ description="Crafter Classic task app with rollout + proxy endpoints",
1020
+ config_factory=build_config,
1021
+ aliases=("crafter", "crafter-task"),
1022
+ env_files=(str(REPO_ROOT / "backend" / ".env.dev"),),
1023
+ modal=ModalDeploymentConfig(
1024
+ app_name="grpo-crafter-task-app",
1025
+ python_version="3.11",
1026
+ pip_packages=(
1027
+ "fastapi>=0.100.0",
1028
+ "uvicorn>=0.23.0",
1029
+ "pydantic>=2.0.0",
1030
+ "numpy>=1.24.0",
1031
+ "aiohttp>=3.8.0",
1032
+ "httpx>=0.24.0",
1033
+ "python-dotenv>=1.0.1",
1034
+ # Tracing/DB runtime deps
1035
+ "sqlalchemy>=2.0.42",
1036
+ "aiosqlite>=0.21.0",
1037
+ "greenlet>=3.2.3",
1038
+ "crafter",
1039
+ ),
1040
+ extra_local_dirs=(
1041
+ # Mount repo root so local modules resolve when deployed on Modal
1042
+ (str(REPO_ROOT), "/opt/synth_ai_repo"),
1043
+ (str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
1044
+ (str(TASK_APP_ROOT), "/opt/synth_ai_repo/examples/task_apps/crafter/task_app"),
1045
+ # Explicitly mount rubrics directory
1046
+ (str(RUBRICS_ROOT), "/opt/synth_ai_repo/examples/multi_step/rubrics"),
1047
+ ),
1048
+ secret_names=("groq-api-key", "openai-api-key"),
1049
+ env_vars={"SERVICE": "MODAL"},
1050
+ memory=16384,
1051
+ cpu=4.0,
1052
+ max_containers=10,
1053
+ ),
1054
+ )
1055
+ )