synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,300 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Test agent and emulator running directly without separate server process
4
+ """
5
+
6
+ import pytest
7
+ import os
8
+ import time
9
+ from pathlib import Path
10
+ from pokemon_env.emulator import EmeraldEmulator
11
+
12
+
13
+ class TestDirectAgentEmulator:
14
+ """Test agent functionality by running emulator directly"""
15
+
16
+ @pytest.fixture(scope="class")
17
+ def output_dir(self):
18
+ """Create output directory for test results"""
19
+ output_path = Path("test_outputs/direct_agent_maps")
20
+ output_path.mkdir(parents=True, exist_ok=True)
21
+ return output_path
22
+
23
+ def format_map_for_comparison(self, tiles, title, location, position):
24
+ """Format map tiles for comparison"""
25
+ if not tiles:
26
+ return f"=== {title} ===\nNo tiles available\n"
27
+
28
+ output = []
29
+ output.append(f"=== {title} ===")
30
+ output.append(f"Format: (MetatileID, Behavior, X, Y)")
31
+ output.append(f"Map dimensions: {len(tiles)}x{len(tiles[0]) if tiles else 0}")
32
+ output.append("")
33
+ output.append("--- TRAVERSABILITY MAP ---")
34
+
35
+ # Header with column numbers
36
+ header = " " + " ".join(f"{i:2}" for i in range(len(tiles[0]) if tiles else 0))
37
+ output.append(header)
38
+ output.append(" " + "-" * (len(header) - 4))
39
+
40
+ # Map rows
41
+ for row_idx, row in enumerate(tiles):
42
+ traversability_row = []
43
+ for col_idx, tile in enumerate(row):
44
+ if len(tile) >= 4:
45
+ tile_id, behavior, collision, elevation = tile
46
+ behavior_val = behavior if not hasattr(behavior, 'value') else behavior.value
47
+
48
+ # Convert to traversability symbol
49
+ if behavior_val == 0: # NORMAL
50
+ symbol = "." if collision == 0 else "#"
51
+ elif behavior_val == 1: # SECRET_BASE_WALL
52
+ symbol = "#"
53
+ elif behavior_val == 51: # IMPASSABLE_SOUTH
54
+ symbol = "IM"
55
+ elif behavior_val == 96: # NON_ANIMATED_DOOR
56
+ symbol = "D"
57
+ elif behavior_val == 101: # SOUTH_ARROW_WARP
58
+ symbol = "SO"
59
+ elif behavior_val == 105: # ANIMATED_DOOR
60
+ symbol = "D"
61
+ elif behavior_val == 134: # TELEVISION
62
+ symbol = "TE"
63
+ else:
64
+ symbol = "." # Default to walkable for other behaviors
65
+
66
+ # Mark player position
67
+ if position and len(position) >= 2:
68
+ # Player is at center of 15x15 map (position 7,7)
69
+ if row_idx == 7 and col_idx == 7:
70
+ symbol = "P"
71
+
72
+ traversability_row.append(symbol)
73
+ else:
74
+ traversability_row.append("?")
75
+
76
+ # Format row with row number
77
+ row_str = f"{row_idx:2}: " + " ".join(f"{symbol:1}" for symbol in traversability_row)
78
+ output.append(row_str)
79
+
80
+ return "\n".join(output)
81
+
82
+ def save_map_output(self, tiles, output_file, title, location, position):
83
+ """Save map output to file"""
84
+ formatted_output = self.format_map_for_comparison(tiles, title, location, position)
85
+
86
+ os.makedirs(os.path.dirname(output_file), exist_ok=True)
87
+ with open(output_file, 'w') as f:
88
+ f.write(formatted_output)
89
+
90
+ return formatted_output
91
+
92
+ def test_direct_emulator_house_to_outside(self, output_dir):
93
+ """Test direct emulator movement from house to outside"""
94
+ print("🏠➡️🌳 DIRECT EMULATOR: House to Outside Movement Test")
95
+
96
+ # Initialize emulator directly
97
+ rom_path = "Emerald-GBAdvance/rom.gba"
98
+ if not os.path.exists(rom_path):
99
+ pytest.skip(f"ROM not found at {rom_path}")
100
+
101
+ emulator = EmeraldEmulator(rom_path=rom_path, headless=True, sound=False)
102
+ emulator.initialize()
103
+ emulator.load_state('tests/states/house.state')
104
+
105
+ try:
106
+ # Get initial house map
107
+ print("\n1️⃣ Getting initial house map...")
108
+ house_state = emulator.get_comprehensive_state()
109
+ house_location = house_state['player']['location']
110
+ house_position = (house_state['player']['position']['x'], house_state['player']['position']['y'])
111
+ house_tiles = house_state['map']['tiles']
112
+
113
+ print(f"House state: {house_location} at {house_position}")
114
+
115
+ # Save house map
116
+ house_output_file = output_dir / "direct_emulator_house.txt"
117
+ house_content = self.save_map_output(
118
+ house_tiles, house_output_file,
119
+ f"Direct Emulator House - {house_location}", house_location, house_position
120
+ )
121
+
122
+ # Analyze house map
123
+ house_corruption = self.analyze_map_corruption(house_tiles)
124
+ print(f"House map: {house_corruption['total']} tiles, {house_corruption['im_count']} IM tiles")
125
+
126
+ # Move to outside area
127
+ print("\n2️⃣ Moving to outside area...")
128
+ moves_made = 0
129
+ max_moves = 8
130
+
131
+ for move_num in range(max_moves):
132
+ print(f"Move {move_num + 1}: Pressing DOWN")
133
+ emulator.press_buttons(['down'], hold_frames=25, release_frames=25)
134
+ time.sleep(0.1) # Small delay for transition
135
+ moves_made += 1
136
+
137
+ # Check if we've left the house
138
+ current_state = emulator.get_comprehensive_state()
139
+ current_location = current_state['player']['location']
140
+ current_position = (current_state['player']['position']['x'], current_state['player']['position']['y'])
141
+
142
+ print(f" Position: {current_position}, Location: {current_location}")
143
+
144
+ if "HOUSE" not in current_location:
145
+ print(f"✅ Reached outside area after {moves_made} moves!")
146
+ break
147
+ else:
148
+ print(f"❌ Still in house after {max_moves} moves")
149
+
150
+ # Wait for any transition effects to complete
151
+ time.sleep(0.5)
152
+
153
+ # Get final outside map
154
+ print("\n3️⃣ Getting final outside map...")
155
+ outside_state = emulator.get_comprehensive_state()
156
+ outside_location = outside_state['player']['location']
157
+ outside_position = (outside_state['player']['position']['x'], outside_state['player']['position']['y'])
158
+ outside_tiles = outside_state['map']['tiles']
159
+
160
+ print(f"Outside state: {outside_location} at {outside_position}")
161
+
162
+ # Save outside map
163
+ outside_output_file = output_dir / "direct_emulator_outside.txt"
164
+ outside_content = self.save_map_output(
165
+ outside_tiles, outside_output_file,
166
+ f"Direct Emulator Outside - {outside_location}", outside_location, outside_position
167
+ )
168
+
169
+ # Analyze outside map
170
+ outside_corruption = self.analyze_map_corruption(outside_tiles)
171
+ print(f"Outside map: {outside_corruption['total']} tiles, {outside_corruption['im_count']} IM tiles")
172
+
173
+ print(f"\n4️⃣ Map Analysis:")
174
+ print(f"House map saved to: {house_output_file}")
175
+ print(f"Outside map saved to: {outside_output_file}")
176
+
177
+ # Test assertions
178
+ assert house_tiles is not None, "House map should exist"
179
+ assert outside_tiles is not None, "Outside map should exist"
180
+ assert len(house_tiles) == 15, "House map should be 15x15"
181
+ assert len(outside_tiles) == 15, "Outside map should be 15x15"
182
+
183
+ # Check that we actually moved to outside area
184
+ assert "HOUSE" not in outside_location, f"Should be outside, but location is: {outside_location}"
185
+
186
+ # Check for reasonable corruption levels
187
+ if outside_corruption['im_count'] > 50:
188
+ print(f"⚠️ WARNING: High corruption in outside map ({outside_corruption['im_count']} IM tiles)")
189
+ print("This indicates area transition detection may not be working properly")
190
+ else:
191
+ print(f"✅ Outside map looks clean ({outside_corruption['im_count']} IM tiles is acceptable)")
192
+
193
+ print("✅ DIRECT EMULATOR TEST PASSED!")
194
+
195
+ finally:
196
+ emulator.stop()
197
+
198
+ def analyze_map_corruption(self, tiles):
199
+ """Analyze map for corruption (IM tiles)"""
200
+ if not tiles:
201
+ return {'total': 0, 'im_count': 0, 'corruption_ratio': 0.0}
202
+
203
+ total_tiles = sum(len(row) for row in tiles)
204
+ im_count = 0
205
+ behavior_distribution = {}
206
+
207
+ for row in tiles:
208
+ for tile in row:
209
+ if len(tile) >= 2:
210
+ behavior = tile[1].value if hasattr(tile[1], 'value') else tile[1]
211
+ behavior_distribution[behavior] = behavior_distribution.get(behavior, 0) + 1
212
+
213
+ if behavior == 51: # IMPASSABLE_SOUTH
214
+ im_count += 1
215
+
216
+ corruption_ratio = im_count / total_tiles if total_tiles > 0 else 0.0
217
+
218
+ return {
219
+ 'total': total_tiles,
220
+ 'im_count': im_count,
221
+ 'corruption_ratio': corruption_ratio,
222
+ 'behavior_distribution': behavior_distribution
223
+ }
224
+
225
+ def test_direct_agent_simulation(self, output_dir):
226
+ """Test simulating agent decision-making with direct emulator access"""
227
+ print("🤖 DIRECT AGENT SIMULATION: Testing agent-like behavior")
228
+
229
+ rom_path = "Emerald-GBAdvance/rom.gba"
230
+ if not os.path.exists(rom_path):
231
+ pytest.skip(f"ROM not found at {rom_path}")
232
+
233
+ emulator = EmeraldEmulator(rom_path=rom_path, headless=True, sound=False)
234
+ emulator.initialize()
235
+ emulator.load_state('tests/states/house.state')
236
+
237
+ try:
238
+ print("\n🎯 Goal: Navigate from house to outside using agent-like logic")
239
+
240
+ steps = 0
241
+ max_steps = 10
242
+
243
+ while steps < max_steps:
244
+ # Get current state (like agent perception)
245
+ current_state = emulator.get_comprehensive_state()
246
+ location = current_state['player']['location']
247
+ position = (current_state['player']['position']['x'], current_state['player']['position']['y'])
248
+
249
+ print(f"\nStep {steps + 1}: {location} at {position}")
250
+
251
+ # Simple agent logic: if in house, move down
252
+ if "HOUSE" in location:
253
+ print(" 🤖 Agent decision: In house, moving DOWN")
254
+ emulator.press_buttons(['down'], hold_frames=25, release_frames=25)
255
+ time.sleep(0.1)
256
+ else:
257
+ print(" 🎉 Agent goal achieved: Reached outside area!")
258
+ break
259
+
260
+ steps += 1
261
+
262
+ # Get final state for analysis
263
+ final_state = emulator.get_comprehensive_state()
264
+ final_location = final_state['player']['location']
265
+ final_position = (final_state['player']['position']['x'], final_state['player']['position']['y'])
266
+ final_tiles = final_state['map']['tiles']
267
+
268
+ print(f"\n📊 Final Result:")
269
+ print(f"Location: {final_location}")
270
+ print(f"Position: {final_position}")
271
+ print(f"Steps taken: {steps}")
272
+
273
+ # Save agent simulation result
274
+ agent_output_file = output_dir / "direct_agent_simulation.txt"
275
+ self.save_map_output(
276
+ final_tiles, agent_output_file,
277
+ f"Direct Agent Simulation - {final_location}", final_location, final_position
278
+ )
279
+
280
+ corruption = self.analyze_map_corruption(final_tiles)
281
+ print(f"Map quality: {corruption['total']} tiles, {corruption['im_count']} IM tiles")
282
+ print(f"Agent simulation saved to: {agent_output_file}")
283
+
284
+ # Test passed if agent successfully navigated
285
+ success = "HOUSE" not in final_location
286
+ if success:
287
+ print("✅ AGENT SIMULATION SUCCESSFUL!")
288
+ else:
289
+ print("❌ Agent failed to navigate out of house")
290
+
291
+ assert steps < max_steps, "Agent should complete navigation within step limit"
292
+
293
+ finally:
294
+ emulator.stop()
295
+
296
+
297
+ if __name__ == "__main__":
298
+ # Allow running as script for manual testing
299
+ import sys
300
+ sys.exit(pytest.main([__file__, "-v", "-s"]))
@@ -0,0 +1,205 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Pytest version of FPS adjustment system test for Pokemon Emerald emulator
4
+
5
+ This test verifies that the FPS adjustment system correctly:
6
+ 1. Runs at 30 FPS in normal overworld state
7
+ 2. Speeds up to 120 FPS (4x) when dialog is detected
8
+ 3. Reverts to 30 FPS when dialog ends
9
+
10
+ Test States:
11
+ - Base overworld: Emerald-GBAdvance/simple_test.state (expected: 30 FPS)
12
+ - Dialog state: server/dialog.state (expected: 120 FPS)
13
+ - Dialog state 2: server/dialog2.state (expected: 120 FPS)
14
+ - After dialog: server/after_dialog.state (expected: 30 FPS)
15
+ """
16
+
17
+ import pytest
18
+ import subprocess
19
+ import time
20
+ import requests
21
+ import os
22
+ import sys
23
+
24
+ # Test data
25
+ TEST_CASES = [
26
+ {
27
+ "state_file": "tests/states/simple_test.state",
28
+ "expected_fps": 30,
29
+ "test_name": "Base Overworld State"
30
+ },
31
+ {
32
+ "state_file": "tests/states/dialog.state",
33
+ "expected_fps": 120,
34
+ "test_name": "Dialog State"
35
+ },
36
+ {
37
+ "state_file": "tests/states/dialog2.state",
38
+ "expected_fps": 120,
39
+ "test_name": "Dialog State 2"
40
+ },
41
+ {
42
+ "state_file": "tests/states/after_dialog.state",
43
+ "expected_fps": 30,
44
+ "test_name": "After Dialog Ends"
45
+ }
46
+ ]
47
+
48
+ class ServerManager:
49
+ """Manages server startup and shutdown for tests"""
50
+
51
+ def __init__(self):
52
+ self.server_process = None
53
+
54
+ def start_server(self, state_file):
55
+ """Start the server with a specific state file"""
56
+ print(f"🚀 Starting server with state: {state_file}")
57
+ cmd = ["python", "-m", "server.app", "--manual", "--load-state", state_file]
58
+
59
+ try:
60
+ self.server_process = subprocess.Popen(
61
+ cmd,
62
+ stdout=subprocess.PIPE,
63
+ stderr=subprocess.PIPE
64
+ )
65
+
66
+ # Wait for server to start
67
+ print("⏳ Waiting for server to start...")
68
+ time.sleep(5)
69
+
70
+ # Test if server is responding
71
+ response = requests.get("http://localhost:8000/status", timeout=5)
72
+ if response.status_code == 200:
73
+ print("✅ Server started successfully")
74
+ return True
75
+ else:
76
+ print(f"❌ Server not responding: {response.status_code}")
77
+ return False
78
+
79
+ except Exception as e:
80
+ print(f"❌ Failed to start server: {e}")
81
+ return False
82
+
83
+ def stop_server(self):
84
+ """Stop the server cleanly"""
85
+ if self.server_process:
86
+ print("🛑 Stopping server...")
87
+ try:
88
+ # Try graceful shutdown first
89
+ requests.post("http://localhost:8000/stop", timeout=2)
90
+ time.sleep(1)
91
+ except:
92
+ pass
93
+
94
+ # Force terminate if still running
95
+ try:
96
+ self.server_process.terminate()
97
+ self.server_process.wait(timeout=5)
98
+ print("✅ Server stopped gracefully")
99
+ except subprocess.TimeoutExpired:
100
+ print("⚠️ Server didn't stop gracefully, force killing...")
101
+ self.server_process.kill()
102
+ self.server_process.wait()
103
+ print("✅ Server force killed")
104
+
105
+ def check_fps(expected_fps, test_name):
106
+ """Check if the current FPS matches the expected value"""
107
+ try:
108
+ response = requests.get("http://localhost:8000/status", timeout=5)
109
+ if response.status_code != 200:
110
+ print(f"❌ Server not responding: {response.status_code}")
111
+ return False
112
+
113
+ status = response.json()
114
+ base_fps = status.get('base_fps')
115
+ current_fps = status.get('current_fps')
116
+ is_dialog = status.get('is_dialog')
117
+ fps_multiplier = status.get('fps_multiplier')
118
+
119
+ print(f" Base FPS: {base_fps}")
120
+ print(f" Current FPS: {current_fps}")
121
+ print(f" Is Dialog: {is_dialog}")
122
+ print(f" FPS Multiplier: {fps_multiplier}")
123
+
124
+ if current_fps == expected_fps:
125
+ print(f"✅ {test_name}: {current_fps} FPS (expected: {expected_fps}) - PASS")
126
+ return True
127
+ else:
128
+ print(f"❌ {test_name}: {current_fps} FPS (expected: {expected_fps}) - FAIL")
129
+ return False
130
+
131
+ except Exception as e:
132
+ print(f"❌ Error checking FPS: {e}")
133
+ return False
134
+
135
+ @pytest.fixture(scope="session", autouse=True)
136
+ def check_environment():
137
+ """Check that we're in the right environment before running tests"""
138
+ # Check if we're in the right directory
139
+ if not os.path.exists("server/app.py"):
140
+ pytest.fail("❌ Error: This test must be run from the project root directory")
141
+
142
+ # Check if state files exist
143
+ required_files = [
144
+ "tests/states/simple_test.state",
145
+ "tests/states/dialog.state",
146
+ "tests/states/dialog2.state",
147
+ "tests/states/after_dialog.state"
148
+ ]
149
+
150
+ missing_files = [f for f in required_files if not os.path.exists(f)]
151
+ if missing_files:
152
+ pytest.fail(f"❌ Error: Missing required state files: {missing_files}")
153
+
154
+ @pytest.mark.parametrize("test_case", TEST_CASES)
155
+ def test_fps_adjustment(test_case):
156
+ """Test FPS adjustment for a specific state"""
157
+ state_file = test_case["state_file"]
158
+ expected_fps = test_case["expected_fps"]
159
+ test_name = test_case["test_name"]
160
+
161
+ print(f"\n🎮 Testing {test_name}")
162
+ print("=" * 50)
163
+ print(f"State file: {state_file}")
164
+ print(f"Expected FPS: {expected_fps}")
165
+
166
+ # Check if state file exists
167
+ if not os.path.exists(state_file):
168
+ pytest.fail(f"❌ State file not found: {state_file}")
169
+
170
+ # Start server
171
+ server = ServerManager()
172
+ if not server.start_server(state_file):
173
+ pytest.fail("Failed to start server")
174
+
175
+ try:
176
+ # For after_dialog state, wait for the 5-second timeout to expire
177
+ if "after_dialog" in state_file:
178
+ print("⏳ Waiting 6 seconds for dialog FPS timeout to expire...")
179
+ time.sleep(6)
180
+
181
+ # Check FPS
182
+ result = check_fps(expected_fps, test_name)
183
+
184
+ # Assert the result
185
+ assert result, f"FPS check failed for {test_name}"
186
+
187
+ finally:
188
+ # Stop server
189
+ server.stop_server()
190
+
191
+ # Wait between tests
192
+ time.sleep(2)
193
+
194
+ def test_fps_adjustment_summary():
195
+ """Test summary - this will run after all individual tests"""
196
+ print("\n📋 FPS Adjustment System Test Summary")
197
+ print("=" * 50)
198
+ print("This test verifies the FPS adjustment system:")
199
+ print("1. Base overworld state: 30 FPS")
200
+ print("2. Dialog state: 120 FPS (4x speedup)")
201
+ print("3. Dialog state 2: 120 FPS (4x speedup) - Currently failing, needs investigation")
202
+ print("4. After dialog ends: 30 FPS (reverted)")
203
+ print()
204
+ print("🎉 All individual FPS tests completed!")
205
+ print("Note: Dialog State 2 is expected to fail until the dialog detection is improved")