synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,415 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Centralized Map Formatting Utility
4
+
5
+ Single source of truth for all map formatting across the codebase.
6
+ """
7
+
8
+ from pokemon_env.enums import MetatileBehavior
9
+
10
+
11
+ def format_tile_to_symbol(tile):
12
+ """
13
+ Convert a single tile to its display symbol.
14
+
15
+ Args:
16
+ tile: Tuple of (tile_id, behavior, collision, elevation)
17
+
18
+ Returns:
19
+ str: Single character symbol representing the tile
20
+ """
21
+ if len(tile) >= 4:
22
+ tile_id, behavior, collision, _ = tile # elevation not used
23
+ elif len(tile) >= 2:
24
+ tile_id, behavior = tile[:2]
25
+ collision = 0
26
+ else:
27
+ tile_id = tile[0] if tile else 0
28
+ behavior = MetatileBehavior.NORMAL
29
+ collision = 0
30
+
31
+ # Convert behavior to symbol using unified logic
32
+ if hasattr(behavior, 'name'):
33
+ behavior_name = behavior.name
34
+ elif isinstance(behavior, int):
35
+ try:
36
+ behavior_enum = MetatileBehavior(behavior)
37
+ behavior_name = behavior_enum.name
38
+ except ValueError:
39
+ behavior_name = "UNKNOWN"
40
+ else:
41
+ behavior_name = "UNKNOWN"
42
+
43
+ # Map to symbol - SINGLE SOURCE OF TRUTH
44
+ # tile_id 1023 (0x3FF) is ALWAYS invalid/out-of-bounds
45
+ if tile_id == 1023:
46
+ return "#" # Always show as blocked/wall
47
+ elif behavior_name == "NORMAL":
48
+ return "." if collision == 0 else "#"
49
+ elif "DOOR" in behavior_name:
50
+ return "D"
51
+ elif "STAIRS" in behavior_name or "WARP" in behavior_name:
52
+ return "S"
53
+ elif "WATER" in behavior_name:
54
+ return "W"
55
+ elif "TALL_GRASS" in behavior_name:
56
+ return "~"
57
+ elif "COMPUTER" in behavior_name or "PC" in behavior_name:
58
+ return "PC" # PC/Computer
59
+ elif "TELEVISION" in behavior_name or "TV" in behavior_name:
60
+ return "T" # Television
61
+ elif "BOOKSHELF" in behavior_name or "SHELF" in behavior_name:
62
+ return "B" # Bookshelf
63
+ elif "SIGN" in behavior_name or "SIGNPOST" in behavior_name:
64
+ return "?" # Sign/Information
65
+ elif "FLOWER" in behavior_name or "PLANT" in behavior_name:
66
+ return "F" # Flowers/Plants
67
+ elif "COUNTER" in behavior_name or "DESK" in behavior_name:
68
+ return "C" # Counter/Desk
69
+ elif "BED" in behavior_name or "SLEEP" in behavior_name:
70
+ return "=" # Bed
71
+ elif "TABLE" in behavior_name or "CHAIR" in behavior_name:
72
+ return "t" # Table/Chair
73
+ elif "CLOCK" in behavior_name:
74
+ return "O" # Clock (O for clock face)
75
+ elif "PICTURE" in behavior_name or "PAINTING" in behavior_name:
76
+ return "^" # Picture/Painting on wall
77
+ elif "TRASH" in behavior_name or "BIN" in behavior_name:
78
+ return "U" # Trash can/bin
79
+ elif "POT" in behavior_name or "VASE" in behavior_name:
80
+ return "V" # Pot/Vase
81
+ elif "MACHINE" in behavior_name or "DEVICE" in behavior_name:
82
+ return "M" # Machine/Device
83
+ elif "JUMP" in behavior_name:
84
+ if "SOUTH" in behavior_name:
85
+ return "↓"
86
+ elif "EAST" in behavior_name:
87
+ return "→"
88
+ elif "WEST" in behavior_name:
89
+ return "←"
90
+ elif "NORTH" in behavior_name:
91
+ return "↑"
92
+ elif "NORTHEAST" in behavior_name:
93
+ return "↗"
94
+ elif "NORTHWEST" in behavior_name:
95
+ return "↖"
96
+ elif "SOUTHEAST" in behavior_name:
97
+ return "↘"
98
+ elif "SOUTHWEST" in behavior_name:
99
+ return "↙"
100
+ else:
101
+ return "J"
102
+ elif "IMPASSABLE" in behavior_name or "SEALED" in behavior_name:
103
+ return "#" # Blocked
104
+ elif "INDOOR" in behavior_name:
105
+ return "." # Indoor tiles are walkable
106
+ elif "DECORATION" in behavior_name or "HOLDS" in behavior_name:
107
+ return "." # Decorations are walkable
108
+ else:
109
+ # For unknown behavior, mark as blocked for safety
110
+ return "#"
111
+
112
+
113
+ def format_map_grid(raw_tiles, player_facing="South", npcs=None, player_coords=None, trim_padding=True):
114
+ """
115
+ Format raw tile data into a traversability grid with NPCs.
116
+
117
+ Args:
118
+ raw_tiles: 2D list of tile tuples
119
+ player_facing: Player facing direction for center marker
120
+ npcs: List of NPC/object events with positions
121
+ trim_padding: If True, remove padding rows/columns that are all walls
122
+
123
+ Returns:
124
+ list: 2D list of symbol strings
125
+ """
126
+ if not raw_tiles or len(raw_tiles) == 0:
127
+ return []
128
+
129
+ grid = []
130
+ center_y = len(raw_tiles) // 2
131
+ center_x = len(raw_tiles[0]) // 2
132
+
133
+ # Player is always at the center of the 15x15 grid view
134
+ # but we need the actual player coordinates for NPC positioning
135
+ player_map_x = center_x # Grid position (always 7,7 in 15x15)
136
+ player_map_y = center_y
137
+
138
+ # Always use P for player instead of direction arrows
139
+ player_symbol = "P"
140
+
141
+ # Create NPC position lookup (convert to relative grid coordinates)
142
+ npc_positions = {}
143
+ if npcs and player_coords:
144
+ try:
145
+ # Handle both tuple and dict formats for player_coords
146
+ if isinstance(player_coords, dict):
147
+ player_abs_x = player_coords.get('x', 0)
148
+ player_abs_y = player_coords.get('y', 0)
149
+ else:
150
+ player_abs_x, player_abs_y = player_coords
151
+
152
+ # Ensure coordinates are integers
153
+ player_abs_x = int(player_abs_x) if player_abs_x is not None else 0
154
+ player_abs_y = int(player_abs_y) if player_abs_y is not None else 0
155
+
156
+ for npc in npcs:
157
+ # NPCs have absolute world coordinates, convert to relative grid position
158
+ npc_abs_x = npc.get('current_x', 0)
159
+ npc_abs_y = npc.get('current_y', 0)
160
+
161
+ # Ensure NPC coordinates are integers
162
+ npc_abs_x = int(npc_abs_x) if npc_abs_x is not None else 0
163
+ npc_abs_y = int(npc_abs_y) if npc_abs_y is not None else 0
164
+
165
+ # Calculate offset from player in absolute coordinates
166
+ offset_x = npc_abs_x - player_abs_x
167
+ offset_y = npc_abs_y - player_abs_y
168
+
169
+ # Convert offset to grid position (player is at center)
170
+ grid_x = center_x + offset_x
171
+ grid_y = center_y + offset_y
172
+
173
+ # Check if NPC is within our grid view
174
+ if 0 <= grid_x < len(raw_tiles[0]) and 0 <= grid_y < len(raw_tiles):
175
+ npc_positions[(grid_y, grid_x)] = npc
176
+
177
+ except (ValueError, TypeError) as e:
178
+ # If coordinate conversion fails, skip NPC positioning
179
+ print(f"Warning: Failed to convert coordinates for NPC positioning: {e}")
180
+ print(f" player_coords: {player_coords}")
181
+ if npcs:
182
+ print(f" npc coords: {[(npc.get('current_x'), npc.get('current_y')) for npc in npcs]}")
183
+ npc_positions = {}
184
+
185
+ for y, row in enumerate(raw_tiles):
186
+ grid_row = []
187
+ for x, tile in enumerate(row):
188
+ if y == center_y and x == center_x:
189
+ # Player position
190
+ grid_row.append(player_symbol)
191
+ elif (y, x) in npc_positions:
192
+ # NPC position - use NPC symbol
193
+ npc = npc_positions[(y, x)]
194
+ # Use different symbols for different NPC types
195
+ if npc.get('trainer_type', 0) > 0:
196
+ grid_row.append("@") # Trainer
197
+ else:
198
+ grid_row.append("N") # Regular NPC
199
+ else:
200
+ # Regular tile
201
+ symbol = format_tile_to_symbol(tile)
202
+ grid_row.append(symbol)
203
+ grid.append(grid_row)
204
+
205
+ # Trim padding if requested - but keep room boundaries!
206
+ if trim_padding and len(grid) > 0:
207
+ # First pass: Remove obvious padding (rows/columns that are ALL walls with no variation)
208
+ # But we need to be careful to keep actual room walls
209
+
210
+ # Check if we have any content in the middle
211
+ has_walkable = False
212
+ for row in grid:
213
+ if any(cell in ['.', 'P', 'D', 'N', 'T', 'S'] for cell in row):
214
+ has_walkable = True
215
+ break
216
+
217
+ if has_walkable:
218
+ # Only trim extra padding beyond the first wall layer
219
+ # Count consecutive wall rows from top
220
+ top_wall_rows = 0
221
+ for row in grid:
222
+ if all(cell == '#' for cell in row):
223
+ top_wall_rows += 1
224
+ else:
225
+ break
226
+
227
+ # Remove extra top padding but keep one wall row
228
+ while top_wall_rows > 1 and len(grid) > 1:
229
+ grid.pop(0)
230
+ top_wall_rows -= 1
231
+
232
+ # Count consecutive wall rows from bottom
233
+ bottom_wall_rows = 0
234
+ for row in reversed(grid):
235
+ if all(cell == '#' for cell in row):
236
+ bottom_wall_rows += 1
237
+ else:
238
+ break
239
+
240
+ # Remove extra bottom padding but keep one wall row
241
+ while bottom_wall_rows > 1 and len(grid) > 1:
242
+ grid.pop()
243
+ bottom_wall_rows -= 1
244
+
245
+ # Similar for left/right but be more conservative
246
+ # Don't trim sides if we have doors or other features in the walls
247
+
248
+ return grid
249
+
250
+
251
+ def format_map_for_display(raw_tiles, player_facing="South", title="Map", npcs=None, player_coords=None):
252
+ """
253
+ Format raw tiles into a complete display string with headers and legend.
254
+
255
+ Args:
256
+ raw_tiles: 2D list of tile tuples
257
+ player_facing: Player facing direction
258
+ title: Title for the map display
259
+ npcs: List of NPC/object events with positions
260
+ player_coords: Dict with player absolute coordinates {'x': x, 'y': y}
261
+
262
+ Returns:
263
+ str: Formatted map display
264
+ """
265
+ if not raw_tiles:
266
+ return f"{title}: No map data available"
267
+
268
+ # Convert player_coords to tuple if it's a dict
269
+ if player_coords and isinstance(player_coords, dict):
270
+ player_coords_tuple = (player_coords['x'], player_coords['y'])
271
+ else:
272
+ player_coords_tuple = player_coords
273
+
274
+ grid = format_map_grid(raw_tiles, player_facing, npcs, player_coords_tuple)
275
+
276
+ lines = [f"{title} ({len(grid)}x{len(grid[0])}):", ""]
277
+
278
+ # Add column headers
279
+ header = " "
280
+ for i in range(len(grid[0])):
281
+ header += f"{i:2} "
282
+ lines.append(header)
283
+ lines.append(" " + "--" * len(grid[0]))
284
+
285
+ # Add grid with row numbers
286
+ for y, row in enumerate(grid):
287
+ row_str = f" {y:2}: " + " ".join(f"{cell:2}" for cell in row)
288
+ lines.append(row_str)
289
+
290
+ # Add dynamic legend based on symbols that appear
291
+ lines.append("")
292
+ lines.append(generate_dynamic_legend(grid))
293
+
294
+ return "\n".join(lines)
295
+
296
+
297
+ def get_symbol_legend():
298
+ """
299
+ Get the complete symbol legend for map displays.
300
+
301
+ Returns:
302
+ dict: Symbol -> description mapping
303
+ """
304
+ return {
305
+ "P": "Player",
306
+ ".": "Walkable path",
307
+ "#": "Wall/Blocked/Unknown",
308
+ "D": "Door",
309
+ "S": "Stairs/Warp",
310
+ "W": "Water",
311
+ "~": "Tall grass",
312
+ "PC": "PC/Computer",
313
+ "T": "Television",
314
+ "B": "Bookshelf",
315
+ "?": "Unexplored area",
316
+ "F": "Flowers/Plants",
317
+ "C": "Counter/Desk",
318
+ "=": "Bed",
319
+ "t": "Table/Chair",
320
+ "O": "Clock",
321
+ "^": "Picture/Painting",
322
+ "U": "Trash can",
323
+ "V": "Pot/Vase",
324
+ "M": "Machine/Device",
325
+ "J": "Jump ledge",
326
+ "↓": "Jump South",
327
+ "↑": "Jump North",
328
+ "←": "Jump West",
329
+ "→": "Jump East",
330
+ "↗": "Jump Northeast",
331
+ "↖": "Jump Northwest",
332
+ "↘": "Jump Southeast",
333
+ "↙": "Jump Southwest",
334
+ "N": "NPC",
335
+ "@": "Trainer"
336
+ }
337
+
338
+
339
+ def generate_dynamic_legend(grid):
340
+ """
341
+ Generate a legend based on symbols that actually appear in the grid.
342
+
343
+ Args:
344
+ grid: 2D list of symbol strings
345
+
346
+ Returns:
347
+ str: Formatted legend string
348
+ """
349
+ if not grid:
350
+ return ""
351
+
352
+ symbol_legend = get_symbol_legend()
353
+ symbols_used = set()
354
+
355
+ # Collect all unique symbols in the grid
356
+ for row in grid:
357
+ for symbol in row:
358
+ symbols_used.add(symbol)
359
+
360
+ # Build legend for used symbols
361
+ legend_lines = ["Legend:"]
362
+
363
+ # Group symbols by category for better organization
364
+ player_symbols = ["P"]
365
+ terrain_symbols = [".", "#", "W", "~", "?"]
366
+ structure_symbols = ["D", "S"]
367
+ jump_symbols = ["J", "↓", "↑", "←", "→", "↗", "↖", "↘", "↙"]
368
+ furniture_symbols = ["PC", "T", "B", "F", "C", "=", "t", "O", "^", "U", "V", "M"]
369
+ npc_symbols = ["N", "@"]
370
+
371
+ categories = [
372
+ ("Movement", player_symbols),
373
+ ("Terrain", terrain_symbols),
374
+ ("Structures", structure_symbols),
375
+ ("Jump ledges", jump_symbols),
376
+ ("Furniture", furniture_symbols),
377
+ ("NPCs", npc_symbols)
378
+ ]
379
+
380
+ for category_name, symbol_list in categories:
381
+ category_items = []
382
+ for symbol in symbol_list:
383
+ if symbol in symbols_used and symbol in symbol_legend:
384
+ category_items.append(f"{symbol}={symbol_legend[symbol]}")
385
+
386
+ if category_items:
387
+ legend_lines.append(f" {category_name}: {', '.join(category_items)}")
388
+
389
+ return "\n".join(legend_lines)
390
+
391
+
392
+ def format_map_for_llm(raw_tiles, player_facing="South", npcs=None, player_coords=None):
393
+ """
394
+ Format raw tiles into LLM-friendly grid format (no headers/legends).
395
+
396
+ Args:
397
+ raw_tiles: 2D list of tile tuples
398
+ player_facing: Player facing direction
399
+ npcs: List of NPC/object events with positions
400
+ player_coords: Tuple of (player_x, player_y) in absolute world coordinates
401
+
402
+ Returns:
403
+ str: Grid format suitable for LLM
404
+ """
405
+ if not raw_tiles:
406
+ return "No map data available"
407
+
408
+ grid = format_map_grid(raw_tiles, player_facing, npcs, player_coords)
409
+
410
+ # Simple grid format for LLM
411
+ lines = []
412
+ for row in grid:
413
+ lines.append(" ".join(row))
414
+
415
+ return "\n".join(lines)