synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,120 @@
1
+ """
2
+ Agent modules for Pokemon Emerald speedrunning agent
3
+ """
4
+
5
+ from utils.vlm import VLM
6
+ from .action import action_step
7
+ from .memory import memory_step
8
+ from .perception import perception_step
9
+ from .planning import planning_step
10
+ from .simple import SimpleAgent, get_simple_agent, simple_mode_processing_multiprocess, configure_simple_agent_defaults
11
+
12
+
13
+ class Agent:
14
+ """
15
+ Unified agent interface that encapsulates all agent logic.
16
+ The client just calls agent.step(game_state) and gets back an action.
17
+ """
18
+
19
+ def __init__(self, args=None):
20
+ """
21
+ Initialize the agent based on configuration.
22
+
23
+ Args:
24
+ args: Command line arguments with agent configuration
25
+ """
26
+ # Extract configuration
27
+ backend = args.backend if args else "gemini"
28
+ model_name = args.model_name if args else "gemini-2.5-flash"
29
+ simple_mode = args.simple if args else False
30
+
31
+ # Initialize VLM
32
+ self.vlm = VLM(backend=backend, model_name=model_name)
33
+ print(f" VLM: {backend}/{model_name}")
34
+
35
+ # Initialize agent mode
36
+ self.simple_mode = simple_mode
37
+ if simple_mode:
38
+ # Use global SimpleAgent instance to enable checkpoint persistence
39
+ self.simple_agent = get_simple_agent(self.vlm)
40
+ print(f" Mode: Simple (direct frame->action)")
41
+ else:
42
+ # Four-module agent context
43
+ self.context = {
44
+ 'perception_output': None,
45
+ 'planning_output': None,
46
+ 'memory': []
47
+ }
48
+ print(f" Mode: Four-module architecture")
49
+
50
+ def step(self, game_state):
51
+ """
52
+ Process a game state and return an action.
53
+
54
+ Args:
55
+ game_state: Dictionary containing:
56
+ - screenshot: PIL Image
57
+ - game_state: Dict with game memory data
58
+ - visual: Dict with visual observations
59
+ - audio: Dict with audio observations
60
+ - progress: Dict with milestone progress
61
+
62
+ Returns:
63
+ dict: Contains 'action' and optionally 'reasoning'
64
+ """
65
+ if self.simple_mode:
66
+ # Simple mode - delegate to SimpleAgent
67
+ return self.simple_agent.step(game_state)
68
+ else:
69
+ # Four-module processing
70
+ try:
71
+ # 1. Perception - understand what's happening
72
+ perception_output = perception_step(
73
+ self.vlm,
74
+ game_state,
75
+ self.context.get('memory', [])
76
+ )
77
+ self.context['perception_output'] = perception_output
78
+
79
+ # 2. Planning - decide strategy
80
+ planning_output = planning_step(
81
+ self.vlm,
82
+ perception_output,
83
+ self.context.get('memory', [])
84
+ )
85
+ self.context['planning_output'] = planning_output
86
+
87
+ # 3. Memory - update context
88
+ memory_output = memory_step(
89
+ perception_output,
90
+ planning_output,
91
+ self.context.get('memory', [])
92
+ )
93
+ self.context['memory'] = memory_output
94
+
95
+ # 4. Action - choose button press
96
+ action_output = action_step(
97
+ self.vlm,
98
+ game_state,
99
+ planning_output,
100
+ perception_output
101
+ )
102
+
103
+ return action_output
104
+
105
+ except Exception as e:
106
+ print(f"❌ Agent error: {e}")
107
+ return None
108
+
109
+
110
+ __all__ = [
111
+ 'Agent',
112
+ 'action_step',
113
+ 'memory_step',
114
+ 'perception_step',
115
+ 'planning_step',
116
+ 'SimpleAgent',
117
+ 'get_simple_agent',
118
+ 'simple_mode_processing_multiprocess',
119
+ 'configure_simple_agent_defaults'
120
+ ]
@@ -0,0 +1,160 @@
1
+ import logging
2
+ import random
3
+ import sys
4
+ from agent.system_prompt import system_prompt
5
+ from utils.state_formatter import format_state_for_llm, format_state_summary, get_movement_options, get_party_health_summary
6
+ from utils.vlm import VLM
7
+
8
+ # Set up module logging
9
+ logger = logging.getLogger(__name__)
10
+
11
+ def action_step(memory_context, current_plan, latest_observation, frame, state_data, recent_actions, vlm):
12
+ """
13
+ Decide and perform the next action button(s) based on memory, plan, observation, and comprehensive state.
14
+ Returns a list of action buttons as strings.
15
+ """
16
+ # Get formatted state context and useful summaries
17
+ state_context = format_state_for_llm(state_data)
18
+ state_summary = format_state_summary(state_data)
19
+ movement_options = get_movement_options(state_data)
20
+ party_health = get_party_health_summary(state_data)
21
+
22
+ logger.info("[ACTION] Starting action decision")
23
+ logger.info(f"[ACTION] State: {state_summary}")
24
+ logger.info(f"[ACTION] Party health: {party_health['healthy_count']}/{party_health['total_count']} healthy")
25
+ if movement_options:
26
+ logger.info(f"[ACTION] Movement options: {movement_options}")
27
+
28
+ # Build enhanced action context
29
+ action_context = []
30
+
31
+ # Extract key info for context
32
+ game_data = state_data.get('game', {})
33
+
34
+ # Battle vs Overworld context
35
+ if game_data.get('in_battle', False):
36
+ action_context.append("=== BATTLE MODE ===")
37
+ battle_info = game_data.get('battle_info', {})
38
+ if battle_info:
39
+ if 'player_pokemon' in battle_info:
40
+ player_pkmn = battle_info['player_pokemon']
41
+ action_context.append(f"Your Pokemon: {player_pkmn.get('species_name', player_pkmn.get('species', 'Unknown'))} (Lv.{player_pkmn.get('level', '?')}) HP: {player_pkmn.get('current_hp', '?')}/{player_pkmn.get('max_hp', '?')}")
42
+ if 'opponent_pokemon' in battle_info:
43
+ opp_pkmn = battle_info['opponent_pokemon']
44
+ action_context.append(f"Opponent: {opp_pkmn.get('species_name', opp_pkmn.get('species', 'Unknown'))} (Lv.{opp_pkmn.get('level', '?')}) HP: {opp_pkmn.get('current_hp', '?')}/{opp_pkmn.get('max_hp', '?')}")
45
+ else:
46
+ action_context.append("=== OVERWORLD MODE ===")
47
+
48
+ # Movement options from utility
49
+ if movement_options:
50
+ action_context.append("Movement Options:")
51
+ for direction, description in movement_options.items():
52
+ action_context.append(f" {direction}: {description}")
53
+
54
+ # Party health summary
55
+ if party_health['total_count'] > 0:
56
+ action_context.append("=== PARTY STATUS ===")
57
+ action_context.append(f"Healthy Pokemon: {party_health['healthy_count']}/{party_health['total_count']}")
58
+ if party_health['critical_pokemon']:
59
+ action_context.append("Critical Pokemon:")
60
+ for critical in party_health['critical_pokemon']:
61
+ action_context.append(f" {critical}")
62
+
63
+ # Recent actions context
64
+ if recent_actions:
65
+ action_context.append(f"Recent Actions: {', '.join(list(recent_actions)[-5:])}")
66
+
67
+ context_str = "\n".join(action_context)
68
+
69
+ action_prompt = f"""
70
+ ★★★ COMPREHENSIVE GAME STATE DATA ★★★
71
+
72
+ {state_context}
73
+
74
+ ★★★ ENHANCED ACTION CONTEXT ★★★
75
+
76
+ {context_str}
77
+
78
+ ★★★ ACTION DECISION TASK ★★★
79
+
80
+ You are the agent playing Pokemon Emerald with a speedrunning mindset. Make quick, efficient decisions.
81
+
82
+ Memory Context: {memory_context}
83
+ Current Plan: {current_plan if current_plan else 'No plan yet'}
84
+ Latest Observation: {latest_observation}
85
+
86
+ Based on the comprehensive state information above, decide your next action(s):
87
+
88
+ BATTLE STRATEGY:
89
+ - If in battle: Choose moves strategically based on type effectiveness and damage
90
+ - Consider switching pokemon if current one is weak/low HP
91
+ - Use items if pokemon is in critical condition
92
+
93
+ NAVIGATION STRATEGY:
94
+ - Use movement options analysis above for efficient navigation
95
+ - Avoid blocked tiles (marked as BLOCKED)
96
+ - Consider tall grass: avoid if party is weak, seek if need to train/catch
97
+ - Navigate around water unless you have Surf
98
+ - Use coordinates to track progress toward objectives
99
+
100
+ MENU/DIALOGUE STRATEGY:
101
+ - If in dialogue: A to advance text, B to cancel/skip if possible
102
+ - If in menu: Navigate with UP/DOWN/LEFT/RIGHT, A to select, B to cancel/back out
103
+ - If stuck in menu/interface: B repeatedly to exit to overworld
104
+ - In Pokemon Center: A to talk to Nurse Joy, A to confirm healing
105
+
106
+ HEALTH MANAGEMENT:
107
+ - If pokemon are low HP/fainted, head to Pokemon Center
108
+ - If no healthy pokemon, prioritize healing immediately
109
+ - Consider terrain: avoid wild encounters if party is weak
110
+
111
+ EFFICIENCY RULES:
112
+ 1. Output sequences of actions when you know what's coming (e.g., "RIGHT, RIGHT, RIGHT, A" to enter a door)
113
+ 2. For dialogue: "A, A, A, A, A" to mash through
114
+ 3. For movement: repeat directions based on movement options (e.g., "UP, UP, UP, UP" if UP shows "Normal path")
115
+ 4. If uncertain, output single action and reassess
116
+ 5. Use traversability data: move toward open paths, avoid obstacles
117
+ 6. If movement doesn't change coordinates (e.g., RIGHT but X doesn't increase), check map for walls (#) blocking your path
118
+
119
+ Valid buttons: A, B, SELECT, START, UP, DOWN, LEFT, RIGHT, L, R
120
+ - A: Interact with NPCs/objects, confirm selections, advance dialogue, use moves in battle
121
+ - B: Cancel menus, back out of interfaces, run faster (with running shoes), flee from battle
122
+ - START: Open main menu (Title sequence, Pokedex, Pokemon, Bag, etc.)
123
+ - SELECT: Use registered key item (typically unused)
124
+ - UP/DOWN/LEFT/RIGHT: Move character, navigate menus, select options
125
+ - L/R: Cycle through pages in some menus, switch Pokemon in battle (rare usage)
126
+
127
+ ⚠️ CRITICAL WARNING: NEVER save the game using the in-game save menu! Saving will crash the entire run and end your progress. If you encounter a save prompt in the game, press B to cancel it immediately!
128
+
129
+ Return ONLY the button name(s) as a comma-separated list, nothing else.
130
+ Maximum 10 actions in sequence. Avoid repeating same button more than 6 times.
131
+ """
132
+
133
+ # Construct complete prompt for VLM
134
+ complete_prompt = system_prompt + action_prompt
135
+
136
+ action_response = vlm.get_text_query(complete_prompt, "ACTION").strip().upper()
137
+ valid_buttons = ['A', 'B', 'SELECT', 'START', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'L', 'R']
138
+
139
+ # Split the response by commas and clean up
140
+ actions = [btn.strip() for btn in action_response.split(',') if btn.strip() in valid_buttons]
141
+
142
+ print(f"Parsed actions: {actions}")
143
+ if len(actions) == 0:
144
+ print("❌ No valid actions parsed - using default 'A'")
145
+ print("-" * 80 + "\n")
146
+
147
+ # Limit to maximum 10 actions and prevent excessive repetition
148
+ actions = actions[:10]
149
+
150
+ # If no valid actions found, make intelligent default based on state
151
+ if not actions:
152
+ if game_data.get('in_battle', False):
153
+ actions = ['A'] # Attack in battle
154
+ elif party_health['total_count'] == 0:
155
+ actions = ['A', 'A', 'A'] # Try to progress dialogue/menu
156
+ else:
157
+ actions = [random.choice(['A', 'RIGHT', 'UP', 'DOWN', 'LEFT'])] # Random exploration
158
+
159
+ logger.info(f"[ACTION] Actions decided: {', '.join(actions)}")
160
+ return actions
@@ -0,0 +1,155 @@
1
+ import logging
2
+ from collections import deque
3
+ from agent.system_prompt import system_prompt
4
+ from utils.state_formatter import format_state_summary, get_party_health_summary
5
+ from utils.vlm import VLM
6
+
7
+ # Set up module logging
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def extract_key_state_info(state_data):
11
+ """Extract key information from comprehensive state for memory storage using the utility functions"""
12
+ # Use the state formatter utilities for consistency
13
+ state_summary = format_state_summary(state_data)
14
+ party_health = get_party_health_summary(state_data)
15
+
16
+ # Extract additional info
17
+ player_data = state_data.get('player', {})
18
+ game_data = state_data.get('game', {})
19
+ map_info = state_data.get('map', {})
20
+
21
+ key_info = {
22
+ 'state_summary': state_summary,
23
+ 'player_name': player_data.get('name', 'Player'),
24
+ 'money': player_data.get('money') or game_data.get('money', 0),
25
+ 'current_map': player_data.get('location', 'Unknown Location'),
26
+ 'in_battle': game_data.get('in_battle', False),
27
+ 'party_health': f"{party_health['healthy_count']}/{party_health['total_count']}",
28
+ 'critical_pokemon': party_health['critical_pokemon']
29
+ }
30
+
31
+ # Position info
32
+ if 'coordinates' in player_data:
33
+ key_info['position'] = player_data['coordinates']
34
+ elif 'position' in player_data:
35
+ key_info['position'] = player_data['position']
36
+ else:
37
+ key_info['position'] = {}
38
+
39
+ # Battle opponent
40
+ if game_data.get('battle_info'):
41
+ battle = game_data['battle_info']
42
+ opponent_pokemon = battle.get('opponent_pokemon', {})
43
+ key_info['battle_opponent'] = opponent_pokemon.get('species_name', opponent_pokemon.get('species', 'Unknown Pokemon'))
44
+
45
+ # Traversability summary
46
+ if 'traversability' in map_info and map_info['traversability']:
47
+ traversability = map_info['traversability']
48
+ total_tiles = sum(len(row) for row in traversability)
49
+ blocked_count = sum(1 for row in traversability for cell in row if str(cell) in ['0', '0'])
50
+ passable_tiles = total_tiles - blocked_count
51
+ key_info['traversability_summary'] = f"{passable_tiles}/{total_tiles} passable"
52
+ else:
53
+ key_info['traversability_summary'] = "No data"
54
+
55
+ return key_info
56
+
57
+ def memory_step(memory_context, current_plan, recent_actions, observation_buffer, vlm):
58
+ """
59
+ Maintain a rolling buffer of the previous 50 actions and observations with state information.
60
+ Returns updated memory_context with the most recent 50 entries and key insights.
61
+ """
62
+ # Initialize memory buffer if it doesn't exist
63
+ if not hasattr(memory_step, 'memory_buffer'):
64
+ memory_step.memory_buffer = deque(maxlen=50)
65
+
66
+ logger.info(f"[MEMORY] Processing {len(observation_buffer)} new observations")
67
+
68
+ # Add new observations with state info to the buffer
69
+ for obs in observation_buffer:
70
+ state_info = extract_key_state_info(obs.get('state', {}))
71
+ memory_step.memory_buffer.append({
72
+ "type": "observation",
73
+ "frame_id": obs["frame_id"],
74
+ "content": obs["observation"],
75
+ "state": state_info
76
+ })
77
+ logger.info(f"[MEMORY] Added observation frame {obs['frame_id']}: {state_info['state_summary']}")
78
+
79
+ # Add recent actions to the buffer
80
+ for action in recent_actions:
81
+ memory_step.memory_buffer.append({
82
+ "type": "action",
83
+ "content": action
84
+ })
85
+
86
+ # Create a formatted memory context from the buffer with state insights
87
+ memory_entries = []
88
+ key_events = []
89
+
90
+ # Track significant state changes
91
+ previous_map = None
92
+ previous_battle_state = None
93
+
94
+ for i, entry in enumerate(memory_step.memory_buffer):
95
+ if entry["type"] == "observation":
96
+ frame_id = entry['frame_id']
97
+ description = entry['content']
98
+ state = entry.get('state', {})
99
+
100
+ # Use the consistent state summary
101
+ state_summary = state.get('state_summary', '')
102
+
103
+ # Check for significant events
104
+ current_map = state.get('current_map', 'Unknown Location')
105
+ current_battle = state.get('in_battle', False)
106
+
107
+ if current_map != previous_map and previous_map is not None:
108
+ key_events.append(f"Moved from {previous_map} to {current_map}")
109
+ logger.info(f"[MEMORY] Key event: Map change from {previous_map} to {current_map}")
110
+
111
+ if current_battle != previous_battle_state:
112
+ if current_battle:
113
+ opponent = state.get('battle_opponent', 'Unknown Pokemon')
114
+ key_events.append(f"Entered battle vs {opponent}")
115
+ logger.info(f"[MEMORY] Key event: Entered battle vs {opponent}")
116
+ else:
117
+ key_events.append("Exited battle")
118
+ logger.info("[MEMORY] Key event: Exited battle")
119
+
120
+ previous_map = current_map
121
+ previous_battle_state = current_battle
122
+
123
+ # Format observation entry
124
+ if isinstance(description, dict):
125
+ desc_text = description.get('description', str(description))
126
+ else:
127
+ desc_text = str(description)
128
+
129
+ memory_entries.append(f"Frame {frame_id}: {desc_text} [{state_summary}]")
130
+ else:
131
+ memory_entries.append(f"Action: {entry['content']}")
132
+
133
+ # Get current state summary from the latest observation
134
+ current_state_summary = ""
135
+ if observation_buffer:
136
+ latest_state = extract_key_state_info(observation_buffer[-1].get('state', {}))
137
+ current_state_summary = latest_state.get('state_summary', 'No state data')
138
+
139
+ # Combine into comprehensive memory context
140
+ memory_context = f"""★★★ COMPREHENSIVE MEMORY CONTEXT ★★★
141
+
142
+ CURRENT STATE: {current_state_summary}
143
+
144
+ CURRENT PLAN: {current_plan if current_plan else 'No plan yet'}
145
+
146
+ KEY EVENTS: {' -> '.join(key_events[-5:]) if key_events else 'None recently'}
147
+
148
+ RECENT MEMORY (last 50 entries):
149
+ {chr(10).join(memory_entries[-30:])}""" # Show last 30 entries to avoid too much text
150
+
151
+ logger.info(f"[MEMORY] Memory context updated with {len(memory_entries)} total entries")
152
+ logger.info(f"[MEMORY] Current state: {current_state_summary}")
153
+ logger.info(f"[MEMORY] Key events: {len(key_events)} tracked")
154
+
155
+ return memory_context
@@ -0,0 +1,69 @@
1
+ import time
2
+ import logging
3
+ from utils.vlm import VLM
4
+ from utils.state_formatter import format_state_for_llm, format_state_summary
5
+ from agent.system_prompt import system_prompt
6
+
7
+ # Set up module logging
8
+ logger = logging.getLogger(__name__)
9
+
10
+ def perception_step(frame, state_data, vlm):
11
+ """
12
+ Observe and describe your current situation using both visual and comprehensive state data.
13
+ Returns (observation, slow_thinking_needed)
14
+ """
15
+ # Format the comprehensive state context using the utility
16
+ state_context = format_state_for_llm(state_data)
17
+
18
+ # Log the state data being used
19
+ state_summary = format_state_summary(state_data)
20
+ logger.info("[PERCEPTION] Processing frame with comprehensive state data")
21
+ logger.info(f"[PERCEPTION] State: {state_summary}")
22
+ logger.info(f"[PERCEPTION] State context length: {len(state_context)} characters")
23
+
24
+ perception_prompt = f"""
25
+ ★★★ COMPREHENSIVE GAME STATE DATA ★★★
26
+
27
+ {state_context}
28
+
29
+ ★★★ VISUAL ANALYSIS TASK ★★★
30
+
31
+ You are the agent, actively playing Pokemon Emerald. Observe and describe your current situation in detail using both the visual frame and the comprehensive game state data above.
32
+
33
+ Based on the visual frame and the above state data, describe your current situation:
34
+ - CUTSCENE or TITLE SCREEN: What does the cutscene or title screen show?
35
+ - MAP: You are navigating a terrain (city, forest, grassland, etc.). Are there any interactable locations (NPCs, items, doors)? What are the traversable vs. non-traversable areas? Use your position coordinates to understand where you are.
36
+ - BATTLE: Analyze the battle situation using both visual and state data. What moves are available? What's the strategy?
37
+ - DIALOGUE: What is the character telling you? How important is this information? Can you respond to the NPC?
38
+ - MENU: What menu are you in? What options are available? What should you select based on your current needs?
39
+
40
+ Combine visual observation with the state data to give a complete picture of the current situation.
41
+ """
42
+
43
+ observation = vlm.get_query(frame, system_prompt + perception_prompt, "PERCEPTION")
44
+
45
+ # Determine if slow thinking is needed based on visual scene and state changes
46
+ scene_check_prompt = f"""
47
+ ★★★ COMPREHENSIVE GAME STATE DATA ★★★
48
+
49
+ {state_context}
50
+
51
+ ★★★ SLOW THINKING DECISION ★★★
52
+
53
+ Based on the current state and visual frame above:
54
+
55
+ Does this scene represent a significant change that requires planning? Consider:
56
+ - Entering/exiting battle
57
+ - Reaching a new map/location
58
+ - Encountering important NPCs or story events
59
+ - Significant changes in pokemon party or game state
60
+
61
+ Answer YES or NO.
62
+ """
63
+ scene_response = vlm.get_query(frame, scene_check_prompt, "PERCEPTION-SCENE_CHECK").strip().lower()
64
+ slow_thinking_needed = ("yes" in scene_response)
65
+
66
+ observation = {"description": observation, "state_data": state_context}
67
+
68
+ logger.info(f"[PERCEPTION] Slow thinking needed: {slow_thinking_needed}")
69
+ return observation, slow_thinking_needed
@@ -0,0 +1,96 @@
1
+ import logging
2
+ from utils.vlm import VLM
3
+ from utils.state_formatter import format_state_for_llm, format_state_summary
4
+ from agent.system_prompt import system_prompt
5
+
6
+ # Set up module logging
7
+ logger = logging.getLogger(__name__)
8
+
9
+ def planning_step(memory_context, current_plan, slow_thinking_needed, state_data, vlm):
10
+ """
11
+ Decide and update your high-level plan based on memory context, current state, and the need for slow thinking.
12
+ Returns updated plan.
13
+ """
14
+ # Get formatted state context
15
+ state_context = format_state_for_llm(state_data)
16
+ state_summary = format_state_summary(state_data)
17
+
18
+ logger.info("[PLANNING] Starting planning step")
19
+ logger.info(f"[PLANNING] State: {state_summary}")
20
+ logger.info(f"[PLANNING] Slow thinking needed: {slow_thinking_needed}")
21
+
22
+ # Check if current plan is accomplished
23
+ if current_plan:
24
+ plan_check_prompt = f"""
25
+ ★★★ COMPREHENSIVE GAME STATE DATA ★★★
26
+
27
+ {state_context}
28
+
29
+ ★★★ PLAN ASSESSMENT TASK ★★★
30
+
31
+ You are the agent playing Pokemon Emerald. Assess your current situation and plan progress.
32
+
33
+ Current Plan: {current_plan}
34
+ Memory Context: {memory_context}
35
+
36
+ Considering your current location, pokemon party, money, traversability, and recent actions:
37
+ Have you accomplished your current plan? Answer YES or NO, and explain briefly.
38
+
39
+ Consider these factors:
40
+ - Did you reach your target location?
41
+ - Did you complete the intended battle/gym challenge?
42
+ - Did you acquire the needed pokemon/items?
43
+ - Are you stuck due to terrain or party status?
44
+ - Do you need to adapt due to wild encounters or water obstacles?
45
+ """
46
+ plan_status = vlm.get_text_query(system_prompt + plan_check_prompt, "PLANNING-ASSESSMENT")
47
+ if "yes" in plan_status.lower():
48
+ current_plan = None
49
+ logger.info("[PLANNING] Current plan marked as completed")
50
+
51
+ # Generate new plan if needed
52
+ if current_plan is None or slow_thinking_needed:
53
+ planning_prompt = f"""
54
+ ★★★ COMPREHENSIVE GAME STATE DATA ★★★
55
+
56
+ {state_context}
57
+
58
+ ★★★ STRATEGIC PLANNING TASK ★★★
59
+
60
+ You are the agent playing Pokemon Emerald with a speedrunning mindset. Create an efficient strategic plan.
61
+
62
+ Memory Context: {memory_context}
63
+
64
+ Analyze your situation and create a strategic plan:
65
+
66
+ 1. IMMEDIATE GOAL: What should you focus on right now? Consider:
67
+ - If in battle: What's your battle strategy based on pokemon HP/levels?
68
+ - If on map: Navigate efficiently using traversability data
69
+ - If in menu/dialogue: How to progress efficiently?
70
+ - Do you need to heal pokemon at Pokemon Center?
71
+ - Are there terrain obstacles (water, blocked paths) to navigate?
72
+
73
+ 2. SHORT-TERM OBJECTIVES (next few actions):
74
+ - Specific steps to achieve your immediate goal
75
+ - Account for your current pokemon party health and levels
76
+ - Consider terrain: avoid/seek tall grass, navigate around obstacles
77
+ - Money management for items/healing
78
+
79
+ 3. LONG-TERM STRATEGY:
80
+ - How does this fit into beating the game quickly?
81
+ - What gym leader or major milestone to target next?
82
+ - Pokemon catching/training priorities based on current party
83
+ - Route optimization considering terrain types
84
+
85
+ 4. EFFICIENCY NOTES:
86
+ - How to minimize backtracking using map layout
87
+ - Shortcuts or sequence breaks considering terrain
88
+ - Wild encounter management (avoid/seek based on needs)
89
+
90
+ Format as a clear, actionable plan focusing on speed and efficiency.
91
+ """
92
+ current_plan = vlm.get_text_query(system_prompt + planning_prompt, "PLANNING-CREATION")
93
+ logger.info("[PLANNING] New plan created")
94
+
95
+ logger.info(f"[PLANNING] Final plan: {current_plan[:300]}..." if len(current_plan) > 300 else f"[PLANNING] Final plan: {current_plan}")
96
+ return current_plan