synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (890) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +60 -2
  461. synth_ai/api/train/builders.py +347 -39
  462. synth_ai/api/train/cli.py +895 -160
  463. synth_ai/api/train/config_finder.py +103 -25
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +70 -20
  470. synth_ai/api/train/pollers.py +29 -4
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +6 -4
  475. synth_ai/api/train/utils.py +64 -52
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +85 -63
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +156 -116
  554. synth_ai/cli/root.py +131 -132
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +2284 -257
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +579 -291
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  583. synth_ai/demos/demo_task_apps/core.py +64 -28
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  591. synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
  592. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  593. synth_ai/demos/math/__init__.py +1 -0
  594. synth_ai/demos/math/_common.py +16 -0
  595. synth_ai/demos/math/app.py +38 -0
  596. synth_ai/demos/math/config.toml +76 -0
  597. synth_ai/demos/math/deploy_modal.py +54 -0
  598. synth_ai/demos/math/modal_task_app.py +703 -0
  599. synth_ai/demos/math/task_app_entry.py +51 -0
  600. synth_ai/environments/environment/core.py +7 -1
  601. synth_ai/environments/examples/bandit/engine.py +12 -5
  602. synth_ai/environments/examples/bandit/environment.py +0 -1
  603. synth_ai/environments/examples/bandit/taskset.py +4 -4
  604. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  605. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  606. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  607. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  608. synth_ai/environments/examples/enron/engine.py +7 -2
  609. synth_ai/environments/examples/enron/environment.py +68 -0
  610. synth_ai/environments/examples/red/engine.py +60 -12
  611. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  612. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  613. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  614. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  615. synth_ai/environments/examples/red/environment.py +86 -0
  616. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  617. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  618. synth_ai/environments/examples/verilog/engine.py +104 -12
  619. synth_ai/environments/examples/wordle/environment.py +0 -1
  620. synth_ai/environments/reproducibility/tree.py +5 -6
  621. synth_ai/environments/service/app.py +11 -12
  622. synth_ai/environments/service/core_routes.py +10 -9
  623. synth_ai/environments/stateful/engine.py +1 -1
  624. synth_ai/environments/tasks/core.py +1 -0
  625. synth_ai/environments/tasks/filters.py +5 -6
  626. synth_ai/environments/tasks/utils.py +4 -5
  627. synth_ai/evals/__init__.py +15 -0
  628. synth_ai/evals/base.py +14 -5
  629. synth_ai/evals/client.py +82 -0
  630. synth_ai/evals/types.py +42 -0
  631. synth_ai/http.py +8 -22
  632. synth_ai/http_client.py +45 -12
  633. synth_ai/inference/__init__.py +0 -2
  634. synth_ai/inference/client.py +21 -7
  635. synth_ai/jobs/client.py +129 -80
  636. synth_ai/judge_schemas.py +127 -0
  637. synth_ai/learning/__init__.py +51 -6
  638. synth_ai/learning/algorithms.py +14 -0
  639. synth_ai/learning/client.py +122 -30
  640. synth_ai/learning/config.py +2 -40
  641. synth_ai/learning/constants.py +0 -2
  642. synth_ai/learning/ft_client.py +4 -56
  643. synth_ai/learning/health.py +14 -8
  644. synth_ai/learning/jobs.py +43 -47
  645. synth_ai/learning/prompt_learning_client.py +276 -0
  646. synth_ai/learning/prompt_learning_types.py +185 -0
  647. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  648. synth_ai/learning/rl/client.py +269 -0
  649. synth_ai/learning/rl/config.py +31 -0
  650. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  651. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  652. synth_ai/learning/rl/secrets.py +13 -0
  653. synth_ai/learning/rl_client.py +2 -253
  654. synth_ai/learning/sft/__init__.py +29 -0
  655. synth_ai/learning/sft/client.py +68 -0
  656. synth_ai/learning/sft/config.py +270 -0
  657. synth_ai/learning/sft/data.py +698 -0
  658. synth_ai/learning/sse.py +25 -26
  659. synth_ai/learning/validators.py +29 -25
  660. synth_ai/mcp/__init__.py +5 -0
  661. synth_ai/mcp/__main__.py +8 -0
  662. synth_ai/mcp/main.py +254 -0
  663. synth_ai/mcp/setup.py +100 -0
  664. synth_ai/modal.py +257 -0
  665. synth_ai/pricing/__init__.py +3 -0
  666. synth_ai/pricing/model_pricing.py +64 -0
  667. synth_ai/session/__init__.py +75 -0
  668. synth_ai/session/client.py +383 -0
  669. synth_ai/session/constants.py +63 -0
  670. synth_ai/session/exceptions.py +105 -0
  671. synth_ai/session/manager.py +139 -0
  672. synth_ai/session/models.py +89 -0
  673. synth_ai/session/query.py +110 -0
  674. synth_ai/spec/__init__.py +46 -0
  675. synth_ai/spec/dataclasses.py +149 -0
  676. synth_ai/spec/loader.py +144 -0
  677. synth_ai/spec/serializer.py +199 -0
  678. synth_ai/spec/validation.py +250 -0
  679. synth_ai/streaming/__init__.py +29 -0
  680. synth_ai/streaming/config.py +94 -0
  681. synth_ai/streaming/handlers.py +589 -0
  682. synth_ai/streaming/streamer.py +320 -0
  683. synth_ai/streaming/types.py +95 -0
  684. synth_ai/task/__init__.py +50 -30
  685. synth_ai/task/apps/__init__.py +63 -19
  686. synth_ai/task/auth.py +35 -23
  687. synth_ai/task/client.py +15 -13
  688. synth_ai/task/config.py +261 -0
  689. synth_ai/task/contracts.py +165 -64
  690. synth_ai/task/datasets.py +9 -6
  691. synth_ai/task/errors.py +11 -10
  692. synth_ai/task/health.py +17 -11
  693. synth_ai/task/inference_api.py +101 -0
  694. synth_ai/task/json.py +58 -24
  695. synth_ai/task/proxy.py +59 -66
  696. synth_ai/task/rubrics/__init__.py +55 -0
  697. synth_ai/task/rubrics/loaders.py +156 -0
  698. synth_ai/task/rubrics/models.py +57 -0
  699. synth_ai/task/rubrics/scoring.py +116 -0
  700. synth_ai/task/rubrics/strict.py +149 -0
  701. synth_ai/task/rubrics.py +22 -15
  702. synth_ai/task/server.py +65 -31
  703. synth_ai/task/trace_correlation_helpers.py +328 -0
  704. synth_ai/task/tracing_utils.py +44 -28
  705. synth_ai/task/validators.py +449 -6
  706. synth_ai/task/vendors.py +5 -7
  707. synth_ai/tracing_v3/__init__.py +4 -0
  708. synth_ai/tracing_v3/abstractions.py +21 -4
  709. synth_ai/tracing_v3/config.py +167 -22
  710. synth_ai/tracing_v3/constants.py +21 -0
  711. synth_ai/tracing_v3/db_config.py +42 -29
  712. synth_ai/tracing_v3/decorators.py +80 -45
  713. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  714. synth_ai/tracing_v3/hooks.py +6 -4
  715. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  716. synth_ai/tracing_v3/migration_helper.py +1 -2
  717. synth_ai/tracing_v3/replica_sync.py +12 -7
  718. synth_ai/tracing_v3/serialization.py +130 -0
  719. synth_ai/tracing_v3/session_tracer.py +73 -16
  720. synth_ai/tracing_v3/storage/base.py +89 -1
  721. synth_ai/tracing_v3/storage/config.py +63 -16
  722. synth_ai/tracing_v3/storage/factory.py +11 -9
  723. synth_ai/tracing_v3/storage/utils.py +15 -11
  724. synth_ai/tracing_v3/trace_utils.py +317 -0
  725. synth_ai/tracing_v3/turso/__init__.py +8 -21
  726. synth_ai/tracing_v3/turso/daemon.py +123 -15
  727. synth_ai/tracing_v3/turso/models.py +5 -2
  728. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  729. synth_ai/tracing_v3/utils.py +5 -4
  730. synth_ai/tunnel.py +143 -0
  731. synth_ai/tunnel_deploy.py +278 -0
  732. synth_ai/types.py +8 -0
  733. synth_ai/urls.py +11 -0
  734. synth_ai/utils/__init__.py +166 -0
  735. synth_ai/utils/agents.py +74 -0
  736. synth_ai/utils/apps.py +152 -0
  737. synth_ai/utils/base_url.py +94 -0
  738. synth_ai/utils/bin.py +39 -0
  739. synth_ai/utils/claude.py +36 -0
  740. synth_ai/utils/cli.py +284 -0
  741. synth_ai/utils/config.py +81 -0
  742. synth_ai/utils/env.py +346 -0
  743. synth_ai/utils/errors.py +85 -0
  744. synth_ai/utils/http.py +172 -0
  745. synth_ai/utils/json.py +72 -0
  746. synth_ai/utils/log_filter.py +99 -0
  747. synth_ai/utils/logging.py +198 -0
  748. synth_ai/utils/modal.py +299 -0
  749. synth_ai/utils/paths.py +95 -0
  750. synth_ai/utils/process.py +233 -0
  751. synth_ai/utils/prompts.py +39 -0
  752. synth_ai/utils/sqld.py +122 -0
  753. synth_ai/utils/ssl.py +25 -0
  754. synth_ai/utils/task_app_discovery.py +882 -0
  755. synth_ai/utils/task_app_env.py +186 -0
  756. synth_ai/utils/task_app_state.py +318 -0
  757. synth_ai/utils/tunnel/__init__.py +12 -0
  758. synth_ai/utils/tunnel/config.py +55 -0
  759. synth_ai/utils/user_config.py +137 -0
  760. synth_ai/uvicorn.py +77 -0
  761. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  762. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  763. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  764. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  765. synth_ai/cli/man.py +0 -106
  766. synth_ai/core/experiment.py +0 -15
  767. synth_ai/core/system.py +0 -15
  768. synth_ai/demo_registry.py +0 -258
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -107
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/task/apps/grpo_crafter.py +0 -438
  838. synth_ai/tracing/__init__.py +0 -30
  839. synth_ai/tracing_v1/__init__.py +0 -33
  840. synth_ai/tracing_v3/turso/manager.py +0 -774
  841. synth_ai/v0/tracing/abstractions.py +0 -224
  842. synth_ai/v0/tracing/base_client.py +0 -91
  843. synth_ai/v0/tracing/client_manager.py +0 -131
  844. synth_ai/v0/tracing/config.py +0 -142
  845. synth_ai/v0/tracing/context.py +0 -146
  846. synth_ai/v0/tracing/decorators.py +0 -682
  847. synth_ai/v0/tracing/events/__init__.py +0 -0
  848. synth_ai/v0/tracing/events/manage.py +0 -147
  849. synth_ai/v0/tracing/events/scope.py +0 -86
  850. synth_ai/v0/tracing/events/store.py +0 -228
  851. synth_ai/v0/tracing/immediate_client.py +0 -151
  852. synth_ai/v0/tracing/local.py +0 -18
  853. synth_ai/v0/tracing/log_client_base.py +0 -73
  854. synth_ai/v0/tracing/retry_queue.py +0 -186
  855. synth_ai/v0/tracing/trackers.py +0 -515
  856. synth_ai/v0/tracing/upload.py +0 -512
  857. synth_ai/v0/tracing/utils.py +0 -9
  858. synth_ai/v0/tracing_v1/__init__.py +0 -16
  859. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  860. synth_ai/v0/tracing_v1/base_client.py +0 -91
  861. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  862. synth_ai/v0/tracing_v1/config.py +0 -142
  863. synth_ai/v0/tracing_v1/context.py +0 -146
  864. synth_ai/v0/tracing_v1/decorators.py +0 -703
  865. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  866. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  867. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  868. synth_ai/v0/tracing_v1/events/store.py +0 -228
  869. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  870. synth_ai/v0/tracing_v1/local.py +0 -18
  871. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  872. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  873. synth_ai/v0/tracing_v1/trackers.py +0 -515
  874. synth_ai/v0/tracing_v1/upload.py +0 -527
  875. synth_ai/v0/tracing_v1/utils.py +0 -9
  876. synth_ai/zyk/__init__.py +0 -30
  877. synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
  878. synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
  879. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  880. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  881. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  882. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  885. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  886. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  887. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  888. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  889. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  890. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1048 @@
1
+ from __future__ import annotations
2
+
3
+ import logging
4
+ from typing import Any, Dict, Iterable, Mapping, Sequence
5
+
6
+ from fastapi import HTTPException, Request
7
+ import httpx
8
+
9
+ from synth_ai.environments.examples.red.environment import PokemonRedEnvironment
10
+ from synth_ai.environments.environment.tools import EnvToolCall
11
+ from synth_ai.environments.examples.red.taskset import INSTANCE as RED_DEFAULT_INSTANCE
12
+ from synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression import (
13
+ PalletTownProgressionCompositeReward,
14
+ )
15
+ from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
16
+ from synth_ai.task.contracts import (
17
+ RolloutMetrics,
18
+ RolloutRequest,
19
+ RolloutResponse,
20
+ RolloutStep,
21
+ RolloutTrajectory,
22
+ TaskInfo,
23
+ )
24
+ from synth_ai.task.server import ProxyConfig, TaskAppConfig
25
+ from synth_ai.task.tracing_utils import (
26
+ build_tracer_factory,
27
+ resolve_sft_output_dir,
28
+ resolve_tracing_db_url,
29
+ tracing_env_enabled,
30
+ )
31
+ from synth_ai.tracing_v3.session_tracer import SessionTracer
32
+ from synth_ai.tracing_v3.abstractions import EnvironmentEvent, TimeRecord
33
+ from datetime import datetime, UTC
34
+
35
+ logger = logging.getLogger(__name__)
36
+
37
+
38
+ def _base_task_info() -> TaskInfo:
39
+ return TaskInfo(
40
+ task={"id": "pokemon_red", "name": "Pokémon Red", "version": "0.1.0"},
41
+ environment="pokemon_red",
42
+ action_space={
43
+ "type": "tool_call",
44
+ "tools": [
45
+ {
46
+ "name": "press_button",
47
+ "schema": {"button": "string", "frames": "int"},
48
+ },
49
+ {
50
+ "name": "execute_sequence",
51
+ "description": "Execute multiple button presses in sequence. More efficient than separate calls. Recommended: 5-10 actions per call.",
52
+ "schema": {
53
+ "type": "object",
54
+ "properties": {
55
+ "actions": {
56
+ "type": "array",
57
+ "items": {
58
+ "type": "object",
59
+ "properties": {
60
+ "button": {"type": "string", "enum": ["UP", "DOWN", "LEFT", "RIGHT", "A", "B", "START", "SELECT"]},
61
+ "frames": {"type": "integer", "minimum": 1, "maximum": 120}
62
+ },
63
+ "required": ["button", "frames"]
64
+ },
65
+ "minItems": 1,
66
+ "maxItems": 20
67
+ }
68
+ },
69
+ "required": ["actions"]
70
+ },
71
+ }
72
+ ],
73
+ "max_calls": 1,
74
+ },
75
+ observation={
76
+ "summary": "GB memory-derived state with reward fields.",
77
+ "keys": [
78
+ "position",
79
+ "badges_earned",
80
+ "badges_bitfield",
81
+ "hp_status",
82
+ "party_level",
83
+ "party_xp",
84
+ "in_battle",
85
+ "step_count",
86
+ "reward_last_step",
87
+ "total_reward",
88
+ "terminated",
89
+ ],
90
+ },
91
+ dataset={"id": "pokemon_red_default", "name": "Pokémon Red Default", "version": "0.1.0"},
92
+ rubric={"version": "1", "criteria_count": 1, "source": "inline"},
93
+ inference={
94
+ "supports_proxy": True,
95
+ "tool": {"name": "press_button", "parallel_tool_calls": False},
96
+ "endpoints": {
97
+ "openai": "/proxy/v1/chat/completions",
98
+ "groq": "/proxy/groq/v1/chat/completions",
99
+ },
100
+ },
101
+ limits={"max_steps": 1000},
102
+ )
103
+
104
+
105
+ def _describe_taskset() -> dict[str, Any]:
106
+ return {"id": "pokemon_red_default", "name": "Pokémon Red Default"}
107
+
108
+
109
+ def _provide_task_instances(seeds: Sequence[int]) -> Iterable[TaskInfo]:
110
+ base = _base_task_info()
111
+ for s in seeds:
112
+ yield TaskInfo(
113
+ task=base.task,
114
+ environment=base.environment,
115
+ action_space=base.action_space,
116
+ observation={**base.observation, "seed": s},
117
+ dataset=base.dataset,
118
+ rubric=base.rubric,
119
+ inference=base.inference,
120
+ limits=base.limits,
121
+ )
122
+
123
+
124
+ def _build_action_context(prev_state: dict[str, Any], current_state: dict[str, Any]) -> dict[str, Any]:
125
+ """Build action context dict with prev_ fields for reward calculation."""
126
+ return {
127
+ "prev_map_id": prev_state.get("map_id", 0),
128
+ "prev_player_x": prev_state.get("player_x", 0),
129
+ "prev_player_y": prev_state.get("player_y", 0),
130
+ "prev_party_count": prev_state.get("party_count", 0),
131
+ "prev_in_battle": prev_state.get("in_battle", False),
132
+ "prev_text_box_active": prev_state.get("text_box_active", False),
133
+ "prev_enemy_hp_current": prev_state.get("enemy_hp_current", 0),
134
+ "prev_enemy_hp_percentage": prev_state.get("enemy_hp_percentage", 0.0),
135
+ "prev_badges": prev_state.get("badges", 0),
136
+ "prev_party_level": prev_state.get("party_level", 0),
137
+ "prev_party_xp": prev_state.get("party_xp", 0),
138
+ }
139
+
140
+
141
+ def _describe_milestone(current_state: dict[str, Any], prev_state: dict[str, Any], reward: float) -> str:
142
+ """Generate human-readable milestone description."""
143
+ descriptions = []
144
+
145
+ # Map transitions
146
+ prev_map = prev_state.get("map_id", -1)
147
+ curr_map = current_state.get("map_id", -1)
148
+ if prev_map != curr_map:
149
+ map_names = {0: "Pallet Town", 1: "Bedroom", 2: "House", 3: "Oak's Lab"}
150
+ descriptions.append(f"Moved from {map_names.get(prev_map, f'Map{prev_map}')} to {map_names.get(curr_map, f'Map{curr_map}')}")
151
+
152
+ # Party changes
153
+ prev_party = prev_state.get("party_count", 0)
154
+ curr_party = current_state.get("party_count", 0)
155
+ if curr_party > prev_party:
156
+ descriptions.append(f"Received Pokémon (party: {prev_party}→{curr_party})")
157
+
158
+ # Battle state
159
+ prev_battle = prev_state.get("in_battle", False)
160
+ curr_battle = current_state.get("in_battle", False)
161
+ if not prev_battle and curr_battle:
162
+ descriptions.append("Entered battle")
163
+ elif prev_battle and not curr_battle:
164
+ battle_outcome = current_state.get("battle_outcome", 0)
165
+ if battle_outcome == 1:
166
+ descriptions.append("Won battle")
167
+ elif battle_outcome == 2:
168
+ descriptions.append("Lost battle")
169
+
170
+ # HP damage
171
+ prev_enemy_hp = prev_state.get("enemy_hp_current", 0)
172
+ curr_enemy_hp = current_state.get("enemy_hp_current", 0)
173
+ if prev_enemy_hp > curr_enemy_hp > 0:
174
+ damage = prev_enemy_hp - curr_enemy_hp
175
+ descriptions.append(f"Dealt {damage} damage to enemy")
176
+
177
+ return " | ".join(descriptions) if descriptions else f"Progress (+{reward:.0f})"
178
+
179
+
180
+ def _calculate_outcome_score(final_state: dict[str, Any], total_reward: float) -> float:
181
+ """Calculate outcome score based on final state and total reward."""
182
+ # Normalize reward to 0-1 scale (max expected is ~700)
183
+ reward_score = min(total_reward / 700.0, 1.0)
184
+
185
+ # Bonus for having Pokemon
186
+ has_pokemon = 1.0 if final_state.get("party_count", 0) > 0 else 0.0
187
+
188
+ # Bonus for being in Oak's lab or having left it
189
+ map_id = final_state.get("map_id", -1)
190
+ map_bonus = 0.5 if map_id in [0, 3] else 0.0 # Pallet Town or Oak's Lab
191
+
192
+ # Weighted combination
193
+ return (reward_score * 0.7) + (has_pokemon * 0.2) + (map_bonus * 0.1)
194
+
195
+
196
+ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) -> RolloutResponse:
197
+ # Initialize SessionTracer for this rollout
198
+ tracer_factory = getattr(fastapi_request.app.state, "session_tracer_factory", None)
199
+ tracer_instance: SessionTracer | None = None
200
+ if callable(tracer_factory):
201
+ try:
202
+ inst = tracer_factory()
203
+ tracer_instance = inst if isinstance(inst, SessionTracer) else None
204
+ except Exception as exc:
205
+ logger.debug(f"TRACER_FACTORY_FAIL: {exc}")
206
+
207
+ # Start tracing session
208
+ if tracer_instance is not None:
209
+ try:
210
+ await tracer_instance.initialize()
211
+ await tracer_instance.start_session(
212
+ session_id=request.run_id,
213
+ metadata={
214
+ "run_id": request.run_id,
215
+ "env_name": "pokemon_red",
216
+ "policy_name": request.policy.policy_name or "default",
217
+ "seed": request.env.seed,
218
+ }
219
+ )
220
+ logger.info(f"[pokemon_red] tracing enabled for run_id={request.run_id}")
221
+ except Exception as exc:
222
+ logger.warning(f"[pokemon_red] tracing init failed: {exc}")
223
+ tracer_instance = None
224
+
225
+ async def _call_inference(policy_cfg: Mapping[str, Any], observation: Mapping[str, Any]) -> Mapping[str, Any]:
226
+ # Check if vision mode is enabled
227
+ use_vision = bool(policy_cfg.get("use_vision", False))
228
+ image_only_mode = bool(policy_cfg.get("image_only_mode", False))
229
+
230
+ # Build user message content
231
+ if use_vision and "observation_image_data_url" in observation:
232
+ # Extract image data URL
233
+ image_data_url = observation["observation_image_data_url"]
234
+
235
+ # Build state summary (text observation)
236
+ state_summary = "State summary: " + str({
237
+ k: observation.get(k)
238
+ for k in observation.keys()
239
+ if k not in ["error", "observation_image_base64", "observation_image_data_url",
240
+ "observation_image_format", "observation_image_width", "observation_image_height"]
241
+ })
242
+
243
+ # Image-only mode: only send image, no text
244
+ if image_only_mode:
245
+ user_content = [
246
+ {"type": "image_url", "image_url": {"url": image_data_url}}
247
+ ]
248
+ else:
249
+ # Vision mode with text: send both text and image
250
+ user_content = [
251
+ {"type": "text", "text": state_summary},
252
+ {"type": "image_url", "image_url": {"url": image_data_url}}
253
+ ]
254
+ else:
255
+ # Text-only mode (default)
256
+ state_summary = "State summary: " + str({
257
+ k: observation.get(k) for k in observation.keys() if k != "error"
258
+ })
259
+ user_content = state_summary
260
+
261
+ messages = [
262
+ {
263
+ "role": "system",
264
+ "content": (
265
+ "You are controlling Pokémon Red, a classic Game Boy game. You can see the game screen in the images provided. "
266
+ "Your goal is to make progress in the game. "
267
+ "IMPORTANT: Always use the 'execute_sequence' tool to submit 5-10 actions per call. "
268
+ "Do not reason about which tool to use - execute_sequence is the only tool available. "
269
+ "Choose appropriate button presses based on what you see in the game screen. "
270
+ "Plan 5-10 actions ahead to play efficiently. "
271
+ "CRITICAL: If stuck in a text box (text_box_active=True), try pressing B button first, then try A. "
272
+ "Always respond with exactly one tool call containing 5-10 actions."
273
+ ),
274
+ },
275
+ {
276
+ "role": "user",
277
+ "content": user_content,
278
+ },
279
+ ]
280
+ payload = {
281
+ "model": policy_cfg.get("model") or "qwen-2.5-7b",
282
+ "messages": messages,
283
+ "tools": [
284
+ {
285
+ "type": "function",
286
+ "function": {
287
+ "name": "execute_sequence",
288
+ "description": "Execute multiple button presses in sequence. More efficient than separate calls. ALWAYS use this tool. Plan 5-10 actions ahead to play efficiently.",
289
+ "parameters": {
290
+ "type": "object",
291
+ "properties": {
292
+ "actions": {
293
+ "type": "array",
294
+ "items": {
295
+ "type": "object",
296
+ "properties": {
297
+ "button": {
298
+ "type": "string",
299
+ "enum": ["UP", "DOWN", "LEFT", "RIGHT", "A", "B", "START", "SELECT"],
300
+ "description": "Game Boy button to press"
301
+ },
302
+ "frames": {
303
+ "type": "integer",
304
+ "minimum": 1,
305
+ "maximum": 120,
306
+ "description": "Number of frames to hold the button (30 frames = 0.5 seconds)"
307
+ }
308
+ },
309
+ "required": ["button", "frames"]
310
+ },
311
+ "minItems": 5,
312
+ "maxItems": 10,
313
+ "description": "Sequence of 5-10 button presses to execute. Plan ahead to navigate efficiently."
314
+ }
315
+ },
316
+ "required": ["actions"],
317
+ "additionalProperties": False,
318
+ },
319
+ },
320
+ }
321
+ ],
322
+ "tool_choice": {"type": "function", "function": {"name": "execute_sequence"}},
323
+ "temperature": float(policy_cfg.get("temperature") or 0.0),
324
+ "top_p": float(policy_cfg.get("top_p") or 1.0),
325
+ "max_tokens": int(policy_cfg.get("max_tokens") or 500),
326
+ }
327
+ inference_url = str(policy_cfg.get("inference_url") or "").rstrip("/")
328
+
329
+ # Determine if this is an external URL or internal proxy
330
+ is_external = inference_url.startswith("http://") or inference_url.startswith("https://")
331
+
332
+ if not inference_url:
333
+ # Prefer built-in proxy endpoints from app if no external URL
334
+ provider = (policy_cfg.get("provider") or "").lower()
335
+ if provider == "groq":
336
+ inference_url = "/proxy/groq/v1/chat/completions"
337
+ else:
338
+ inference_url = "/proxy/v1/chat/completions"
339
+ is_external = False
340
+ elif is_external:
341
+ # Add /v1/chat/completions if using OpenAI directly
342
+ if "api.openai.com" in inference_url and not inference_url.endswith("/chat/completions"):
343
+ inference_url = inference_url + "/v1/chat/completions"
344
+
345
+ # Debug: print exact payload being sent
346
+ import json as _json_debug
347
+ print(f"\n{'='*80}")
348
+ print(f"[pokemon_red] INFERENCE REQUEST DEBUG")
349
+ print(f"{'='*80}")
350
+ print(f"Inference URL: {inference_url}")
351
+ print(f"Payload keys: {list(payload.keys())}")
352
+ print(f"Payload (formatted):")
353
+ print(_json_debug.dumps(payload, indent=2)[:2000])
354
+ print(f"{'='*80}\n")
355
+
356
+
357
+ if is_external:
358
+ # External API: use direct HTTP client with auth header
359
+ headers = {}
360
+ import os
361
+ if "api.openai.com" in inference_url:
362
+ api_key = os.getenv("OPENAI_API_KEY")
363
+ if api_key:
364
+ headers["Authorization"] = f"Bearer {api_key}"
365
+ elif "modal.run" in inference_url or "synth" in inference_url.lower():
366
+ # Synth API: use SYNTH_API_KEY
367
+ api_key = os.getenv("SYNTH_API_KEY")
368
+ if api_key:
369
+ headers["Authorization"] = f"Bearer {api_key}"
370
+ print(f"[pokemon_red] Using Synth API auth: {'Bearer ' + api_key[:10] + '...' if api_key else 'NONE'}")
371
+ # For 30B-A3B models, require H200 (A100 doesn't have enough memory)
372
+ model_id = payload.get("model", "")
373
+ if "30B-A3B" in model_id or "A3B" in model_id:
374
+ headers["X-GPU-Preference"] = "H200"
375
+ print(f"[pokemon_red] Setting X-GPU-Preference: H200 (required for A3B MoE)")
376
+
377
+ async with httpx.AsyncClient(timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0)) as client: # 30 min read timeout for cold starts
378
+ resp = await client.post(inference_url, json=payload, headers=headers)
379
+ else:
380
+ # Internal proxy: use local base_url
381
+ async with httpx.AsyncClient(
382
+ base_url="http://127.0.0.1:" + str(fastapi_request.url.port or 8913),
383
+ timeout=httpx.Timeout(connect=30.0, read=1800.0, write=60.0, pool=60.0) # 30 min read timeout for cold starts
384
+ ) as client:
385
+ resp = await client.post(inference_url, json=payload)
386
+
387
+ resp.raise_for_status()
388
+ data = resp.json()
389
+
390
+ # Record user message (system + user)
391
+ if tracer_instance is not None:
392
+ try:
393
+ print(f"[pokemon_red] Recording messages: tracer_instance={tracer_instance is not None}", flush=True)
394
+ # Record system message
395
+ await tracer_instance.record_message(
396
+ content=messages[0].get("content", ""),
397
+ message_type="system",
398
+ )
399
+ # Record user message
400
+ user_msg_content = messages[1].get("content", "")
401
+ if isinstance(user_msg_content, list):
402
+ # For multimodal content, extract text summary
403
+ text_parts = [item.get("text", "") for item in user_msg_content if item.get("type") == "text"]
404
+ user_msg_content = " ".join(text_parts) if text_parts else str(user_msg_content)
405
+ await tracer_instance.record_message(
406
+ content=user_msg_content,
407
+ message_type="user",
408
+ )
409
+ print(f"[pokemon_red] Recorded user messages", flush=True)
410
+ except Exception as exc:
411
+ logger.debug(f"[pokemon_red] Failed to record user messages: {exc}")
412
+ print(f"[pokemon_red] ERROR recording user messages: {exc}", flush=True)
413
+
414
+ # Debug logging for tool calls
415
+ print(f"\n{'='*80}")
416
+ print(f"[pokemon_red] INFERENCE RESPONSE DEBUG")
417
+ print(f"{'='*80}")
418
+ print(f"Response status: {resp.status_code}")
419
+ print(f"Response keys: {list(data.keys())}")
420
+ choices = data.get("choices") or []
421
+ if choices:
422
+ message = choices[0].get("message") or {}
423
+ print(f"Message keys: {list(message.keys())}")
424
+ print(f"Message content preview: {str(message.get('content', ''))[:200]}")
425
+ print(f"Tool calls: {message.get('tool_calls', [])}")
426
+ print(f"Full message (formatted):")
427
+ print(_json_debug.dumps(message, indent=2)[:1500])
428
+ print(f"{'='*80}\n")
429
+
430
+ # Record assistant message/tool calls
431
+ if tracer_instance is not None:
432
+ try:
433
+ message = choices[0].get("message", {}) if choices else {}
434
+ tool_calls = message.get("tool_calls", [])
435
+ content = message.get("content", "")
436
+
437
+ if tool_calls:
438
+ # Record tool calls as assistant message
439
+ import json as _json_record
440
+ await tracer_instance.record_message(
441
+ content=_json_record.dumps(tool_calls) if tool_calls else (content or ""),
442
+ message_type="assistant",
443
+ metadata={"is_tool_call": True} if tool_calls else {},
444
+ )
445
+ elif content:
446
+ # Record text content as assistant message
447
+ await tracer_instance.record_message(
448
+ content=content,
449
+ message_type="assistant",
450
+ )
451
+ except Exception as exc:
452
+ logger.debug(f"[pokemon_red] Failed to record assistant message: {exc}")
453
+
454
+ # Extract first tool call
455
+ if not choices:
456
+ print("[pokemon_red] WARNING: No choices in inference response")
457
+ return {}
458
+ message = choices[0].get("message") or {}
459
+ raw_calls = message.get("tool_calls") or []
460
+
461
+ # If no structured tool_calls, try parsing XML tool calls from content
462
+ if not raw_calls:
463
+ content = message.get("content", "")
464
+ if content and "<tool_call>" in content:
465
+ import re as _re
466
+ import json as _json_parse
467
+ # Parse XML tool calls: <tool_call>{...}</tool_call>
468
+ xml_pattern = r'<tool_call>\s*({.*?})\s*</tool_call>'
469
+ matches = _re.findall(xml_pattern, content, _re.DOTALL)
470
+ if matches:
471
+ print(f"[pokemon_red] Parsed {len(matches)} XML tool call(s) from content")
472
+ try:
473
+ tool_data = _json_parse.loads(matches[0])
474
+ tool_name = tool_data.get("name", "")
475
+ args = tool_data.get("arguments", {})
476
+
477
+ print(f"[pokemon_red] Parsed tool: {tool_name}, args: {str(args)[:200]}")
478
+
479
+ # Handle execute_sequence tool
480
+ if tool_name == "execute_sequence":
481
+ return {"actions": args.get("actions", [])}
482
+
483
+ # Handle press_button tool (legacy single action)
484
+ if tool_name == "press_button":
485
+ return {"button": args.get("button"), "frames": int(args.get("frames") or 30)}
486
+ except Exception as parse_err:
487
+ print(f"[pokemon_red] Error parsing XML tool call: {parse_err}")
488
+
489
+ if not raw_calls:
490
+ print(f"[pokemon_red] WARNING: No tool_calls in response. Content: {message.get('content', '')[:200]}")
491
+ return {}
492
+
493
+ f = raw_calls[0].get("function") or {}
494
+ tool_name = f.get("name", "")
495
+ args = f.get("arguments")
496
+ import json as _json
497
+ try:
498
+ parsed_args = _json.loads(args) if isinstance(args, str) else dict(args or {})
499
+ except Exception:
500
+ parsed_args = {}
501
+
502
+ # Handle execute_sequence tool
503
+ if tool_name == "execute_sequence":
504
+ return {"actions": parsed_args.get("actions", [])}
505
+
506
+ # Handle press_button tool (legacy single action)
507
+ return {"button": parsed_args.get("button"), "frames": int(parsed_args.get("frames") or 30)}
508
+
509
+ # Initialize reward function
510
+ reward_fn = PalletTownProgressionCompositeReward()
511
+
512
+ env = PokemonRedEnvironment(RED_DEFAULT_INSTANCE)
513
+ obs0 = await env.initialize()
514
+
515
+ # Track cumulative stats
516
+ total_reward = 0.0
517
+ all_reward_components: list[dict[str, Any]] = []
518
+ milestone_events: list[dict[str, Any]] = []
519
+
520
+ steps: list[RolloutStep] = [
521
+ RolloutStep(obs=obs0, tool_calls=[], reward=0.0, done=False, info={"step_type": "initial"}),
522
+ ]
523
+
524
+ # Track previous state for reward calculation
525
+ prev_state = dict(obs0) if isinstance(obs0, Mapping) else {}
526
+
527
+ # Process all ops (explicit actions)
528
+ final_obs = obs0
529
+ for step_idx, op in enumerate(request.ops or []):
530
+ macro = None
531
+ if isinstance(op, dict):
532
+ macro = op.get("action") or op
533
+
534
+ if isinstance(macro, dict):
535
+ # Check if this is an execute_sequence call
536
+ if "actions" in macro:
537
+ # Handle execute_sequence: multiple actions in one call
538
+ actions_list = macro.get("actions", [])
539
+ sequence_reward = 0.0
540
+ sequence_tool_calls = []
541
+
542
+ for action_item in actions_list:
543
+ button = action_item.get("button", "A")
544
+ frames = int(action_item.get("frames", 1))
545
+
546
+ obs1 = await env.step(EnvToolCall(tool="press_button", args={"button": button, "frames": frames}))
547
+ current_state = dict(obs1) if isinstance(obs1, Mapping) else {}
548
+ action_context = _build_action_context(prev_state, current_state)
549
+ step_reward = await reward_fn.score(current_state, action_context)
550
+
551
+ # Record environment event
552
+ if tracer_instance is not None:
553
+ try:
554
+ event = EnvironmentEvent(
555
+ system_instance_id="environment:pokemon_red",
556
+ time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
557
+ reward=step_reward,
558
+ terminated=False,
559
+ truncated=False,
560
+ system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
561
+ system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
562
+ metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
563
+ )
564
+ await tracer_instance.record_event(event)
565
+ except Exception as exc:
566
+ logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
567
+
568
+ sequence_reward += step_reward
569
+ sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
570
+
571
+ if step_reward > 0:
572
+ reward_component = {
573
+ "step": step_idx + 1,
574
+ "reward": step_reward,
575
+ "button": button,
576
+ "map_id": current_state.get("map_id"),
577
+ "position": f"({current_state.get('player_x')},{current_state.get('player_y')})",
578
+ }
579
+ all_reward_components.append(reward_component)
580
+ milestone_events.append({
581
+ "type": "milestone",
582
+ "step": step_idx + 1,
583
+ "reward": step_reward,
584
+ "description": _describe_milestone(current_state, prev_state, step_reward),
585
+ })
586
+
587
+ final_obs = obs1
588
+ prev_state = current_state
589
+
590
+ total_reward += sequence_reward
591
+ step_info = {
592
+ "step_type": "sequence",
593
+ "step_idx": step_idx,
594
+ "actions_count": len(actions_list),
595
+ "cumulative_reward": total_reward,
596
+ }
597
+ if sequence_reward > 0:
598
+ step_info["sequence_reward"] = sequence_reward
599
+
600
+ steps.append(
601
+ RolloutStep(
602
+ obs=final_obs,
603
+ tool_calls=sequence_tool_calls,
604
+ reward=sequence_reward,
605
+ done=False,
606
+ info=step_info,
607
+ )
608
+ )
609
+ else:
610
+ # Handle single press_button call
611
+ button = macro.get("button") or "A"
612
+ frames = int(macro.get("frames") or 1)
613
+ obs1 = await env.step(EnvToolCall(tool="press_button", args={"button": button, "frames": frames}))
614
+
615
+ # Calculate step reward
616
+ current_state = dict(obs1) if isinstance(obs1, Mapping) else {}
617
+ action_context = _build_action_context(prev_state, current_state)
618
+ step_reward = await reward_fn.score(current_state, action_context)
619
+
620
+ # Record environment event
621
+ if tracer_instance is not None:
622
+ try:
623
+ event = EnvironmentEvent(
624
+ system_instance_id="environment:pokemon_red",
625
+ time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
626
+ reward=step_reward,
627
+ terminated=False,
628
+ truncated=False,
629
+ system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
630
+ system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
631
+ metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
632
+ )
633
+ await tracer_instance.record_event(event)
634
+ except Exception as exc:
635
+ logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
636
+ total_reward += step_reward
637
+
638
+ # Track reward components if non-zero
639
+ step_info: dict[str, Any] = {"step_type": "action", "step_idx": step_idx}
640
+ if step_reward > 0:
641
+ reward_component = {
642
+ "step": step_idx + 1,
643
+ "reward": step_reward,
644
+ "button": button,
645
+ "map_id": current_state.get("map_id"),
646
+ "position": f"({current_state.get('player_x')},{current_state.get('player_y')})",
647
+ }
648
+ all_reward_components.append(reward_component)
649
+ step_info["reward_component"] = reward_component
650
+
651
+ # Track milestone events
652
+ milestone_events.append({
653
+ "type": "milestone",
654
+ "step": step_idx + 1,
655
+ "reward": step_reward,
656
+ "description": _describe_milestone(current_state, prev_state, step_reward),
657
+ })
658
+
659
+ step_info["cumulative_reward"] = total_reward
660
+
661
+ steps.append(
662
+ RolloutStep(
663
+ obs=obs1,
664
+ tool_calls=[{"tool": "press_button", "args": {"button": button, "frames": frames}}],
665
+ reward=step_reward,
666
+ done=False,
667
+ info=step_info,
668
+ )
669
+ )
670
+ final_obs = obs1
671
+ prev_state = current_state
672
+ else:
673
+ # Attempt policy-driven step if policy.config present
674
+ policy_cfg = request.policy.config or {}
675
+ if policy_cfg:
676
+ print(f"[pokemon_red] Calling _call_inference: tracer_instance={tracer_instance is not None}", flush=True)
677
+ try:
678
+ action = await _call_inference(policy_cfg, final_obs if isinstance(final_obs, Mapping) else {})
679
+
680
+ # Handle execute_sequence from policy
681
+ if "actions" in action:
682
+ actions_list = action.get("actions", [])
683
+ sequence_reward = 0.0
684
+ sequence_tool_calls = []
685
+
686
+ for action_item in actions_list:
687
+ button = action_item.get("button", "A")
688
+ frames = int(action_item.get("frames", 30))
689
+
690
+ obs1 = await env.step(EnvToolCall(tool="press_button", args={"button": button, "frames": frames}))
691
+ current_state = dict(obs1) if isinstance(obs1, Mapping) else {}
692
+ action_context = _build_action_context(prev_state, current_state)
693
+ step_reward = await reward_fn.score(current_state, action_context)
694
+
695
+ # Record environment event
696
+ if tracer_instance is not None:
697
+ try:
698
+ event = EnvironmentEvent(
699
+ system_instance_id="environment:pokemon_red",
700
+ time_record=TimeRecord(event_time=datetime.now(UTC).timestamp()),
701
+ reward=step_reward,
702
+ terminated=False,
703
+ truncated=False,
704
+ system_state_before={"map_id": prev_state.get("map_id"), "position": f"({prev_state.get('player_x')},{prev_state.get('player_y')})"},
705
+ system_state_after={"map_id": current_state.get("map_id"), "position": f"({current_state.get('player_x')},{current_state.get('player_y')})"},
706
+ metadata={"step": step_idx + 1, "button": button, "run_id": request.run_id},
707
+ )
708
+ await tracer_instance.record_event(event)
709
+ except Exception as exc:
710
+ logger.debug(f"[pokemon_red] Failed to record environment event: {exc}")
711
+
712
+ sequence_reward += step_reward
713
+ sequence_tool_calls.append({"tool": "press_button", "args": {"button": button, "frames": frames}})
714
+
715
+ if step_reward > 0:
716
+ reward_component = {
717
+ "step": step_idx + 1,
718
+ "reward": step_reward,
719
+ "button": button,
720
+ "map_id": current_state.get("map_id"),
721
+ "position": f"({current_state.get('player_x')},{current_state.get('player_y')})",
722
+ }
723
+ all_reward_components.append(reward_component)
724
+ milestone_events.append({
725
+ "type": "milestone",
726
+ "step": step_idx + 1,
727
+ "reward": step_reward,
728
+ "description": _describe_milestone(current_state, prev_state, step_reward),
729
+ })
730
+
731
+ final_obs = obs1
732
+ prev_state = current_state
733
+
734
+ total_reward += sequence_reward
735
+ step_info = {
736
+ "step_type": "policy_sequence",
737
+ "step_idx": step_idx,
738
+ "actions_count": len(actions_list),
739
+ "cumulative_reward": total_reward,
740
+ }
741
+ if sequence_reward > 0:
742
+ step_info["sequence_reward"] = sequence_reward
743
+
744
+ steps.append(
745
+ RolloutStep(
746
+ obs=final_obs,
747
+ tool_calls=sequence_tool_calls,
748
+ reward=sequence_reward,
749
+ done=False,
750
+ info=step_info,
751
+ )
752
+ )
753
+
754
+ # Handle single button press from policy
755
+ elif action.get("button"):
756
+ obs1 = await env.step(EnvToolCall(tool="press_button", args=action))
757
+
758
+ # Calculate step reward
759
+ current_state = dict(obs1) if isinstance(obs1, Mapping) else {}
760
+ action_context = _build_action_context(prev_state, current_state)
761
+ step_reward = await reward_fn.score(current_state, action_context)
762
+ total_reward += step_reward
763
+
764
+ step_info_policy: dict[str, Any] = {
765
+ "step_type": "policy",
766
+ "step_idx": step_idx,
767
+ "cumulative_reward": total_reward,
768
+ "proxy": True,
769
+ }
770
+ if step_reward > 0:
771
+ step_info_policy["reward_earned"] = step_reward
772
+
773
+ steps.append(
774
+ RolloutStep(
775
+ obs=obs1,
776
+ tool_calls=[{"tool": "press_button", "args": action}],
777
+ reward=step_reward,
778
+ done=False,
779
+ info=step_info_policy,
780
+ )
781
+ )
782
+ final_obs = obs1
783
+ prev_state = current_state
784
+ except Exception:
785
+ pass
786
+
787
+ # Calculate outcome score based on milestones achieved
788
+ final_state = dict(final_obs) if isinstance(final_obs, Mapping) else {}
789
+ outcome_score = _calculate_outcome_score(final_state, total_reward)
790
+
791
+ metrics = RolloutMetrics(
792
+ episode_returns=[total_reward],
793
+ mean_return=total_reward,
794
+ num_steps=len(steps),
795
+ num_episodes=1,
796
+ outcome_score=outcome_score,
797
+ details={
798
+ "total_reward": total_reward,
799
+ "reward_components": all_reward_components,
800
+ "milestone_events": milestone_events,
801
+ "final_map": final_state.get("map_id"),
802
+ "party_count": final_state.get("party_count", 0),
803
+ "badges": final_state.get("badges", 0),
804
+ },
805
+ )
806
+
807
+ # Extract inference_url from policy config
808
+ inference_url = (policy_cfg or {}).get("inference_url")
809
+
810
+ trajectory = RolloutTrajectory(
811
+ env_id="pokemon_red",
812
+ policy_id=request.policy.policy_id or "policy",
813
+ steps=steps,
814
+ final={"observation": final_obs, "reward": total_reward},
815
+ length=len(steps),
816
+ inference_url=inference_url, # NEW: Required for trace correlation
817
+ )
818
+
819
+ # Record outcome rewards and end session
820
+ trace_payload = None
821
+ if tracer_instance is not None:
822
+ try:
823
+ # Count achievements (milestones)
824
+ achievements_count = len(milestone_events)
825
+
826
+ # Build metadata with all relevant info
827
+ reward_metadata = {
828
+ "run_id": request.run_id,
829
+ "env_name": "pokemon_red",
830
+ "final_map": final_state.get("map_id", -1),
831
+ "party_count": final_state.get("party_count", 0),
832
+ "badges": final_state.get("badges", 0),
833
+ "steps": len(steps),
834
+ "milestone_events": milestone_events,
835
+ "reward_components": all_reward_components,
836
+ }
837
+
838
+ # Record outcome reward to Turso
839
+ await tracer_instance.record_outcome_reward(
840
+ total_reward=int(total_reward),
841
+ achievements_count=achievements_count,
842
+ total_steps=len(steps),
843
+ reward_metadata=reward_metadata,
844
+ )
845
+ logger.info(f"[pokemon_red] recorded outcome: reward={total_reward}, achievements={achievements_count}")
846
+
847
+ # End session and get trace
848
+ session_trace = await tracer_instance.end_session()
849
+
850
+ # Build trace payload if requested - ALWAYS use full format when return_trace=True
851
+ # This ensures markov_blanket_message_history is always included
852
+ record_config = getattr(request, 'record', None)
853
+ print(f"[pokemon_red] TRACE DEBUG: record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
854
+ if session_trace:
855
+ print(f"[pokemon_red] TRACE DEBUG: IMMEDIATELY AFTER end_session: session_trace has {len(session_trace.markov_blanket_message_history)} messages, {len(session_trace.event_history)} events", flush=True)
856
+ print(f"[pokemon_red] TRACE DEBUG: session_trace.markov_blanket_message_history type: {type(session_trace.markov_blanket_message_history)}", flush=True)
857
+ if session_trace.markov_blanket_message_history:
858
+ print(f"[pokemon_red] TRACE DEBUG: First message type: {type(session_trace.markov_blanket_message_history[0])}, content: {str(session_trace.markov_blanket_message_history[0].content)[:100]}", flush=True)
859
+ else:
860
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY RIGHT AFTER end_session!", flush=True)
861
+
862
+ if record_config and getattr(record_config, 'return_trace', False) and session_trace:
863
+ # Always return full trace with all messages and events (no compact format)
864
+ import dataclasses
865
+ trace_payload = session_trace.to_dict()
866
+ print(f"[pokemon_red] TRACE DEBUG: to_dict() returned keys: {list(trace_payload.keys())}", flush=True)
867
+ print(f"[pokemon_red] TRACE DEBUG: to_dict() markov_blanket_message_history length: {len(trace_payload.get('markov_blanket_message_history', []))}", flush=True)
868
+
869
+ # Always manually serialize messages and events to ensure they're included
870
+ # asdict() may not recursively serialize nested dataclasses correctly
871
+ from synth_ai.tracing_v3.abstractions import SessionEventMarkovBlanketMessage, BaseEvent
872
+ if session_trace.markov_blanket_message_history:
873
+ print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.markov_blanket_message_history)} messages", flush=True)
874
+ trace_payload["markov_blanket_message_history"] = [
875
+ dataclasses.asdict(msg) if isinstance(msg, SessionEventMarkovBlanketMessage) else (msg if isinstance(msg, dict) else str(msg))
876
+ for msg in session_trace.markov_blanket_message_history
877
+ ]
878
+ else:
879
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.markov_blanket_message_history is EMPTY!", flush=True)
880
+ if session_trace.event_history:
881
+ print(f"[pokemon_red] TRACE DEBUG: Manually serializing {len(session_trace.event_history)} events", flush=True)
882
+ trace_payload["event_history"] = [
883
+ dataclasses.asdict(evt) if isinstance(evt, BaseEvent) else (evt if isinstance(evt, dict) else str(evt))
884
+ for evt in session_trace.event_history
885
+ ]
886
+ else:
887
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - session_trace.event_history is EMPTY!", flush=True)
888
+ print(f"[pokemon_red] TRACE DEBUG: Final trace payload has {len(trace_payload.get('markov_blanket_message_history', []))} messages, {len(trace_payload.get('event_history', []))} events", flush=True)
889
+ print(f"[pokemon_red] TRACE DEBUG: Final trace payload keys: {list(trace_payload.keys())}", flush=True)
890
+ else:
891
+ print(f"[pokemon_red] TRACE DEBUG: SKIPPING trace payload build - record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}, session_trace={session_trace is not None}", flush=True)
892
+ except Exception as exc:
893
+ logger.warning(f"[pokemon_red] tracing finalization failed: {exc}")
894
+ print(f"[pokemon_red] TRACE DEBUG EXCEPTION: {exc}", flush=True)
895
+ import traceback
896
+ print(f"[pokemon_red] TRACE DEBUG EXCEPTION TRACEBACK: {traceback.format_exc()}", flush=True)
897
+
898
+ # Fallback trace payload if no tracer but CLI needs it
899
+ if trace_payload is None:
900
+ record_config = getattr(request, 'record', None)
901
+ print(f"[pokemon_red] TRACE DEBUG: trace_payload is None, using fallback. record_config={record_config}, return_trace={getattr(record_config, 'return_trace', None) if record_config else None}", flush=True)
902
+ if record_config and getattr(record_config, 'return_trace', False):
903
+ trace_payload = {
904
+ "session_id": request.run_id,
905
+ "created_at": import_datetime().now().isoformat(),
906
+ "metadata": {
907
+ "run_id": request.run_id,
908
+ "env_name": "pokemon_red",
909
+ "total_reward": int(total_reward),
910
+ "final_map": final_state.get("map_id", -1),
911
+ "party_count": final_state.get("party_count", 0),
912
+ "badges": final_state.get("badges", 0),
913
+ "steps": len(steps),
914
+ },
915
+ "num_timesteps": len(steps),
916
+ "num_events": len(steps),
917
+ "num_messages": len(steps) * 2,
918
+ }
919
+ print(f"[pokemon_red] TRACE DEBUG: Created fallback trace_payload with keys: {list(trace_payload.keys())}", flush=True)
920
+
921
+ print(f"[pokemon_red] TRACE DEBUG: About to return RolloutResponse with trace_payload={trace_payload is not None}, keys={list(trace_payload.keys()) if trace_payload else []}", flush=True)
922
+ if trace_payload:
923
+ import json as _json_final
924
+ markov_msgs = trace_payload.get('markov_blanket_message_history', [])
925
+ event_history = trace_payload.get('event_history', [])
926
+ print(f"[pokemon_red] TRACE DEBUG: trace_payload markov_blanket_message_history length: {len(markov_msgs)}", flush=True)
927
+ print(f"[pokemon_red] TRACE DEBUG: trace_payload event_history length: {len(event_history)}", flush=True)
928
+ if markov_msgs:
929
+ print(f"[pokemon_red] TRACE DEBUG: First markov message type: {type(markov_msgs[0]) if markov_msgs else None}", flush=True)
930
+ print(f"[pokemon_red] TRACE DEBUG: First markov message (first 500 chars): {_json_final.dumps(markov_msgs[0] if markov_msgs else {}, indent=2, default=str)[:500]}", flush=True)
931
+ else:
932
+ print(f"[pokemon_red] TRACE DEBUG: WARNING - markov_blanket_message_history is EMPTY in final trace_payload!", flush=True)
933
+
934
+ response = RolloutResponse(
935
+ run_id=request.run_id,
936
+ trajectories=[trajectory],
937
+ branches={},
938
+ metrics=metrics,
939
+ aborted=False,
940
+ ops_executed=len(request.ops or []),
941
+ trace=trace_payload,
942
+ )
943
+
944
+ # Final check: inspect what's actually in the response
945
+ if response.trace:
946
+ import json as _json_response
947
+ resp_markov = response.trace.get('markov_blanket_message_history', []) if isinstance(response.trace, dict) else []
948
+ print(f"[pokemon_red] TRACE DEBUG: Response.trace markov_blanket_message_history length: {len(resp_markov)}", flush=True)
949
+
950
+ return response
951
+
952
+
953
+ def import_datetime():
954
+ """Helper to import datetime for trace timestamps."""
955
+ from datetime import datetime
956
+ return datetime
957
+
958
+
959
+ def build_config() -> TaskAppConfig:
960
+ base_info = _base_task_info()
961
+
962
+ # Set up tracing
963
+ tracing_enabled = tracing_env_enabled()
964
+ tracing_db_url = resolve_tracing_db_url()
965
+ tracer_factory = build_tracer_factory(
966
+ SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
967
+ )
968
+ sft_output_dir = resolve_sft_output_dir()
969
+
970
+ app_state: dict[str, Any] = {
971
+ "tracing_enabled": tracing_enabled,
972
+ }
973
+ if tracer_factory is not None:
974
+ app_state["session_tracer_factory"] = tracer_factory
975
+ if sft_output_dir:
976
+ app_state["sft_output_dir"] = sft_output_dir
977
+
978
+ if tracing_enabled:
979
+ status_msg = f"[task:tracing] enabled (db={tracing_db_url or 'default'})"
980
+ logger.info(status_msg)
981
+ print(status_msg, flush=True)
982
+
983
+ return TaskAppConfig(
984
+ app_id="pokemon_red",
985
+ name="Pokémon Red Task App",
986
+ description="Expose Pokémon Red via Synth task framework (demo).",
987
+ base_task_info=base_info,
988
+ describe_taskset=_describe_taskset,
989
+ provide_task_instances=_provide_task_instances,
990
+ rollout=rollout_executor,
991
+ dataset_registry=None,
992
+ proxy=ProxyConfig(
993
+ enable_openai=True,
994
+ enable_groq=True,
995
+ system_hint=(
996
+ "You control Pokémon Red. Use 'execute_sequence' with 5-10 actions to play efficiently. "
997
+ "Plan ahead: navigate rooms, advance dialogue, battle strategically. "
998
+ "Example: {\"tool\": \"execute_sequence\", \"args\": {\"actions\": [{\"button\": \"DOWN\", \"frames\": 30}, ...]}}"
999
+ ),
1000
+ ),
1001
+ app_state=app_state,
1002
+ require_api_key=False,
1003
+ expose_debug_env=True,
1004
+ cors_origins=["*"],
1005
+ )
1006
+
1007
+
1008
+ register_task_app(
1009
+ entry=TaskAppEntry(
1010
+ app_id="pokemon_red",
1011
+ description="Pokémon Red demo task app with vision support",
1012
+ config_factory=build_config,
1013
+ aliases=("pokemon_red_demo",),
1014
+ env_files=(),
1015
+ modal=ModalDeploymentConfig(
1016
+ app_name="pokemon-red-vision-task-app",
1017
+ python_version="3.11",
1018
+ pip_packages=(
1019
+ "fastapi>=0.100.0",
1020
+ "uvicorn>=0.23.0",
1021
+ "pydantic>=2.0.0",
1022
+ "numpy>=1.24.0",
1023
+ "aiohttp>=3.8.0",
1024
+ "httpx>=0.24.0",
1025
+ "python-dotenv>=1.0.1",
1026
+ # Tracing/DB runtime deps
1027
+ "sqlalchemy>=2.0.42",
1028
+ "aiosqlite>=0.21.0",
1029
+ "greenlet>=3.2.3",
1030
+ # Pokemon Red environment
1031
+ "pyboy>=2.0.0",
1032
+ "pillow>=9.0.0",
1033
+ ),
1034
+ extra_local_dirs=(
1035
+ # Mount repo root so local modules resolve when deployed on Modal
1036
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai", "/opt/synth_ai_repo"),
1037
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai/synth_ai", "/opt/synth_ai_repo/synth_ai"),
1038
+ ("/Users/joshpurtell/Documents/GitHub/synth-ai/examples/task_apps/pokemon_red", "/opt/synth_ai_repo/examples/task_apps/pokemon_red"),
1039
+ ),
1040
+ secret_names=("openai-api-key", "groq-api-key"),
1041
+ memory=16384,
1042
+ cpu=4.0,
1043
+ max_containers=10,
1044
+ ),
1045
+ )
1046
+ )
1047
+
1048
+