synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,357 @@
1
+ # Pokémon Red Task App
2
+
3
+ A reinforcement learning environment for Pokémon Red using PyBoy emulation with VLM support.
4
+
5
+ ## Features
6
+
7
+ - **Full Game Boy Emulation**: Uses PyBoy to run authentic Pokémon Red ROM
8
+ - **VLM Support**: Base64-encoded PNG frames for vision models (GPT-4V, Qwen-VL, etc.)
9
+ - **Policy Proxy**: OpenAI/Groq API integration for LLM-driven gameplay
10
+ - **Rich State Extraction**: Comprehensive game state from RAM (HP, position, party, battle data)
11
+ - **Reward Shaping**: Ultra-dense reward functions for RL training
12
+ - **Instant Start**: Pre-configured init state skips intro (starts in Red's bedroom)
13
+
14
+ ## Quick Start
15
+
16
+ ### 1. Start the Task App Server
17
+
18
+ ```bash
19
+ # From synth-ai root
20
+ uv run -m synth_ai task-app serve pokemon_red --port 8913
21
+ ```
22
+
23
+ ### 2. Run a Random Rollout
24
+
25
+ ```python
26
+ import httpx
27
+ import asyncio
28
+
29
+ async def test_rollout():
30
+ async with httpx.AsyncClient(timeout=60.0) as client:
31
+ response = await client.post(
32
+ "http://127.0.0.1:8913/rollout",
33
+ json={
34
+ "ops": [
35
+ {"button": "DOWN", "frames": 10},
36
+ {"button": "A", "frames": 20},
37
+ {"button": "RIGHT", "frames": 15},
38
+ ],
39
+ "policy": {"config": {}},
40
+ },
41
+ )
42
+ result = response.json()
43
+ print(f"Steps: {len(result['steps'])}")
44
+
45
+ asyncio.run(test_rollout())
46
+ ```
47
+
48
+ ### 3. Run with VLM Policy
49
+
50
+ ```bash
51
+ # Using Qwen-VL via Groq
52
+ uv run python examples/task_apps/pokemon_red/test_pallet_town_rewards.py
53
+ ```
54
+
55
+ ## Reward Functions
56
+
57
+ ### Pallet Town Progression (Recommended for Beginners)
58
+
59
+ **Location**: `synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py`
60
+
61
+ Ultra-rich reward shaping for the opening sequence:
62
+
63
+ | Milestone | Reward | Description |
64
+ |-----------|--------|-------------|
65
+ | Leave bedroom | +20 | Go downstairs |
66
+ | Exit house | +30 | Enter Pallet Town |
67
+ | Find Oak's lab | +40 | Discover and enter lab |
68
+ | Talk to Oak | +50 | First dialogue |
69
+ | Get starter | +100 | Receive your first Pokémon |
70
+ | Enter battle | +75 | Start rival battle |
71
+ | Deal damage | +50 | Attack rival (10×5) |
72
+ | Half HP | +25 | Reduce enemy to <50% HP |
73
+ | Low HP | +35 | Reduce enemy to <25% HP |
74
+ | Win battle | +150 | Defeat rival |
75
+ | Exit lab | +60 | Leave with Pokémon |
76
+ | **Efficiency bonuses** | +100 | Fast navigation, healthy Pokémon |
77
+
78
+ **Total: ~600-700 points**
79
+
80
+ See [`PALLET_TOWN_REWARDS.md`](../../../synth_ai/environments/examples/red/engine_helpers/reward_library/PALLET_TOWN_REWARDS.md) for full documentation.
81
+
82
+ ### Usage in Training
83
+
84
+ ```toml
85
+ # pallet_town_rl_config.toml
86
+ [reward]
87
+ reward_type = "composite"
88
+ reward_class = "synth_ai.environments.examples.red.engine_helpers.reward_library.pallet_town_progression.PalletTownProgressionCompositeReward"
89
+
90
+ [training]
91
+ algorithm = "ppo"
92
+ max_steps_per_episode = 500
93
+ num_episodes = 1000
94
+ ```
95
+
96
+ ## State Schema
97
+
98
+ The environment exposes comprehensive game state:
99
+
100
+ ```python
101
+ {
102
+ # Position
103
+ "map_id": int, # Current location
104
+ "player_x": int,
105
+ "player_y": int,
106
+
107
+ # Party
108
+ "party_count": int,
109
+ "party_pokemon": [
110
+ {
111
+ "species_id": int,
112
+ "level": int,
113
+ "hp_current": int,
114
+ "hp_max": int,
115
+ "hp_percentage": float,
116
+ "xp": int,
117
+ }
118
+ ],
119
+
120
+ # Battle
121
+ "in_battle": bool,
122
+ "battle_outcome": int, # 0=ongoing, 1=win, 2=lose
123
+ "enemy_hp_current": int,
124
+ "enemy_hp_max": int,
125
+ "enemy_hp_percentage": float,
126
+ "enemy_level": int,
127
+ "enemy_species_id": int,
128
+ "battle_turn": int,
129
+
130
+ # Dialogue & UI
131
+ "text_box_active": bool,
132
+ "menu_state": int,
133
+
134
+ # Progress
135
+ "badges": int, # Bitfield of earned badges
136
+ "money": int,
137
+
138
+ # VLM Support
139
+ "observation_image_base64": str, # PNG frame for vision models
140
+ }
141
+ ```
142
+
143
+ ## Action Space
144
+
145
+ ### Button Actions
146
+
147
+ ```python
148
+ {
149
+ "button": "A" | "B" | "START" | "SELECT" | "UP" | "DOWN" | "LEFT" | "RIGHT",
150
+ "frames": int, # How long to hold the button (60fps)
151
+ }
152
+ ```
153
+
154
+ ### Policy-Driven Actions
155
+
156
+ When using LLM policies, the task app proxies requests to OpenAI/Groq:
157
+
158
+ ```python
159
+ {
160
+ "policy": {
161
+ "config": {
162
+ "model": "gpt-4-turbo",
163
+ "api_key": "...",
164
+ # or for Groq:
165
+ # "model": "qwen-2.5-7b",
166
+ # "base_url": "https://api.groq.com/v1",
167
+ }
168
+ }
169
+ }
170
+ ```
171
+
172
+ ## Files
173
+
174
+ - **`task_app.py`**: Main task app entry point
175
+ - **`pallet_town_rl_config.toml`**: Training config for Pallet Town sequence
176
+ - **`test_pallet_town_rewards.py`**: Reward function test/demo script
177
+ - **`create_red_init_state.py`** (repo root): Script to generate init state
178
+ - **`Pokemon - Red Version (USA, Europe) (SGB Enhanced).gb`**: Your ROM (not committed)
179
+
180
+ ## Creating Init States
181
+
182
+ The default init state starts in Red's bedroom with intro skipped. To create custom states:
183
+
184
+ ```python
185
+ # See /Users/joshpurtell/Documents/GitHub/synth-ai/create_red_init_state.py
186
+ from pyboy import PyBoy
187
+
188
+ emulator = PyBoy("path/to/rom.gb", window="null")
189
+
190
+ # Navigate to desired starting point
191
+ # ... (button presses)
192
+
193
+ # Save state
194
+ with open("custom_init.state", "wb") as f:
195
+ emulator.save_state(f)
196
+ ```
197
+
198
+ ## Memory Addresses
199
+
200
+ Key RAM addresses are defined in `synth_ai/environments/examples/red/engine_helpers/memory_map.py`:
201
+
202
+ - `MAP_ID = 0xD35E`
203
+ - `PLAYER_X/Y = 0xD362/0xD361`
204
+ - `IN_BATTLE_FLAG = 0xD057`
205
+ - `ENEMY_HP_CURRENT = 0xCFE6`
206
+ - `PARTY_COUNT = 0xD163`
207
+ - `BADGE_FLAGS = 0xD356`
208
+ - (and many more)
209
+
210
+ ## Troubleshooting
211
+
212
+ ### ROM Not Found
213
+
214
+ ```bash
215
+ # Set environment variable
216
+ export POKEMON_RED_ROM_PATH="/path/to/pokemon_red.gb"
217
+
218
+ # Or copy ROM to expected location
219
+ cp "Pokemon - Red Version.gb" synth_ai/environments/examples/red/roms/pokemon_red.gb
220
+ ```
221
+
222
+ ### PyBoy Not Installed
223
+
224
+ ```bash
225
+ uv add pyboy
226
+ ```
227
+
228
+ ### Server Won't Start (Port in Use)
229
+
230
+ ```bash
231
+ # Kill existing server
232
+ lsof -ti :8913 | xargs -r kill -9
233
+
234
+ # Or use a different port
235
+ uv run -m synth_ai task-app serve pokemon_red --port 8914
236
+ ```
237
+
238
+ ## Examples
239
+
240
+ ### 1. Policy Evaluation with GPT-5-nano
241
+
242
+ Evaluate a GPT-5-nano policy across 10 episodes (10 policy calls each):
243
+
244
+ ```bash
245
+ # From synth-ai root
246
+ cd /Users/joshpurtell/Documents/GitHub/synth-ai
247
+
248
+ # 1. Make sure OpenAI API key is in .env
249
+ echo "OPENAI_API_KEY=sk-..." >> .env
250
+
251
+ # 2. Start the task app server (in background)
252
+ nohup sh -c 'printf "n\n" | uv run -m synth_ai task-app serve pokemon_red --port 8913 --no-reload' > nohup_pokemon.log 2>&1 &
253
+
254
+ # Wait for startup
255
+ sleep 8
256
+
257
+ # 3. Run the evaluation
258
+ uv run python examples/task_apps/pokemon_red/eval_pokemon_red_policy.py
259
+ ```
260
+
261
+ **Expected Output:**
262
+ ```
263
+ ================================================================================
264
+ POKÉMON RED - POLICY EVALUATION
265
+ ================================================================================
266
+
267
+ Task: Pallet Town Progression
268
+ Policy: gpt-5-nano
269
+ Episodes: 10
270
+ Max steps per episode: 10
271
+
272
+ ✓ Server is healthy
273
+ ✓ API key loaded
274
+
275
+ 🎮 Running 10 episodes in parallel...
276
+
277
+ ================================================================================
278
+ RESULTS SUMMARY
279
+ ================================================================================
280
+
281
+ +-----------+----------+---------+-------------+---------+----------+--------------+
282
+ | Episode | Reward | Steps | Final Map | Party | Badges | Milestones |
283
+ +===========+==========+=========+=============+=========+==========+==============+
284
+ | 1 | 0 | 10 | Map38 | 0 | 0 | 0 |
285
+ | 2 | 0 | 9 | Map38 | 0 | 0 | 0 |
286
+ | 9 | 20 | 10 | Map38 | 0 | 0 | 1 |
287
+ +-----------+----------+---------+-------------+---------+----------+--------------+
288
+
289
+ Statistics:
290
+ Mean reward: 2.00
291
+ Max reward: 20.00
292
+ Success rate: 10% reached first milestone
293
+
294
+ Best Episode (#9):
295
+ Total reward: 20.0
296
+ Milestones achieved:
297
+ Step 5: Moved from Map38 to Map37 (+20.0)
298
+ ```
299
+
300
+ **Key Features:**
301
+ - ✅ **Action Batching**: Each policy call returns 5-10 actions via `execute_sequence` tool
302
+ - ✅ **Parallel Execution**: All 10 episodes run concurrently
303
+ - ✅ **Rich Metrics**: Rewards, steps, maps, party status, milestones tracked
304
+ - ✅ **Fast Evaluation**: ~2-3 minutes for 10 episodes (vs 50+ min without batching)
305
+
306
+ **Customize the Evaluation:**
307
+
308
+ ```python
309
+ # In eval_pokemon_red_policy.py
310
+ NUM_EPISODES = 10 # Number of episodes to run
311
+ MAX_STEPS_PER_EPISODE = 10 # Policy calls per episode (each returns 5-10 actions)
312
+ MODEL = "gpt-5-nano" # Or "gpt-4-turbo", "qwen-2.5-7b", etc.
313
+ ```
314
+
315
+ ### 2. Test Script (Random Actions)
316
+
317
+ ```bash
318
+ cd /Users/joshpurtell/Documents/GitHub/synth-ai
319
+ uv run python test_pokemon_red_rollout.py
320
+ ```
321
+
322
+ ### 3. Reward Function Demo
323
+
324
+ ```bash
325
+ uv run python examples/task_apps/pokemon_red/test_pallet_town_rewards.py
326
+ ```
327
+
328
+ Output:
329
+ ```
330
+ ======================================================================
331
+ PALLET TOWN PROGRESSION - REWARD SIMULATION
332
+ ======================================================================
333
+
334
+ ✓ Leave bedroom (Map 1→2): +20 points
335
+ ✓ Exit house to Pallet Town (Map 2→0): +30 points
336
+ ✓ Find and enter Oak's Lab (Map 0→3): +40 points
337
+ ...
338
+ ======================================================================
339
+ TOTAL REWARD: 705 points
340
+ ======================================================================
341
+ ```
342
+
343
+ ## Future Work
344
+
345
+ - [ ] Route 1 exploration rewards
346
+ - [ ] Wild Pokémon encounter rewards
347
+ - [ ] Capture mechanics rewards
348
+ - [ ] Gym battle rewards
349
+ - [ ] Badge collection rewards
350
+ - [ ] Multi-environment curriculum (Pallet → Viridian → Pewter)
351
+
352
+ ## Credits
353
+
354
+ - **PyBoy**: Game Boy emulator - https://github.com/Baekalfen/PyBoy
355
+ - **Pokémon Red Disassembly**: RAM map reference - https://github.com/pret/pokered
356
+ - **Datacrystal.org**: Memory address documentation
357
+