synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ import json
5
6
  import logging
6
7
  import os
7
8
  import sys
@@ -11,11 +12,12 @@ from pathlib import Path
11
12
  from typing import Any
12
13
 
13
14
  from synth_ai.task.apps import ModalDeploymentConfig, TaskAppEntry, register_task_app
14
- from synth_ai.task.contracts import RolloutMetrics, RolloutRequest, RolloutResponse, TaskInfo
15
+ from synth_ai.task.contracts import RolloutMetrics, RolloutMode, RolloutRequest, RolloutResponse, TaskInfo
15
16
  from synth_ai.task.datasets import TaskDatasetRegistry, TaskDatasetSpec
16
17
  from synth_ai.task.json import to_jsonable # noqa: F401 (imported for side-effect compatibility)
17
18
  from synth_ai.task.rubrics import load_rubric
18
19
  from synth_ai.task.server import ProxyConfig, RubricBundle, TaskAppConfig
20
+ from synth_ai.task.validators import normalize_inference_url
19
21
  from synth_ai.task.tracing_utils import (
20
22
  build_tracer_factory,
21
23
  resolve_sft_output_dir,
@@ -24,6 +26,18 @@ from synth_ai.task.tracing_utils import (
24
26
  )
25
27
  from synth_ai.tracing_v3.session_tracer import SessionTracer
26
28
 
29
+ try:
30
+ from .synth_envs_hosted.utils import (
31
+ ensure_chat_completions_url,
32
+ extract_trace_correlation_id,
33
+ )
34
+ except Exception: # pragma: no cover - utils unavailable if optional deps missing
35
+ def ensure_chat_completions_url(raw_url, mode=None):
36
+ """Fallback to shared utility for URL normalization."""
37
+ return normalize_inference_url(raw_url) if raw_url else raw_url
38
+
39
+ def extract_trace_correlation_id(_raw_url):
40
+ return None
27
41
  logger = logging.getLogger(__name__)
28
42
 
29
43
  DEFAULT_ALIAS_OPS: list[str] = ["agent", "env"] * 10
@@ -68,7 +82,7 @@ def _resolve_repo_root() -> Path:
68
82
  def _resolve_task_app_root(repo_root: Path) -> Path:
69
83
  """Locate the task_app directory even when the module is copied to a temp mount."""
70
84
 
71
- preferred = (repo_root / "examples" / "warming_up_to_rl" / "task_app").resolve()
85
+ preferred = (repo_root / "examples" / "task_apps" / "crafter" / "task_app").resolve()
72
86
  if preferred.is_dir():
73
87
  return preferred
74
88
 
@@ -81,7 +95,7 @@ def _resolve_task_app_root(repo_root: Path) -> Path:
81
95
  if (candidate / "synth_envs_hosted").is_dir():
82
96
  return candidate
83
97
 
84
- fallback = Path("/opt/synth_ai_repo/examples/warming_up_to_rl/task_app")
98
+ fallback = Path("/opt/synth_ai_repo/examples/task_apps/crafter/task_app")
85
99
  if fallback.is_dir():
86
100
  return fallback.resolve()
87
101
 
@@ -95,6 +109,110 @@ SYNTH_ENVS_HOSTED_ROOT = (TASK_APP_ROOT / "synth_envs_hosted").resolve()
95
109
  EXAMPLES_ROOT = (REPO_ROOT / "examples").resolve()
96
110
  RUBRICS_ROOT = (EXAMPLES_ROOT / "multi_step" / "rubrics").resolve()
97
111
 
112
+ DEFAULT_OUTCOME_RUBRIC_DATA: dict[str, Any] = {
113
+ "version": "1",
114
+ "goal_text": (
115
+ "Reward episodes that climb the Crafter achievement ladder, stockpile key resources "
116
+ "(especially wood), and finish alive with clear understanding of any failure."
117
+ ),
118
+ "aggregation": "weighted_sum",
119
+ "criteria": [
120
+ {
121
+ "id": "achievement_progression",
122
+ "description": (
123
+ "Weigh achievements by tier: late-game unlocks (iron tools, furnace, armor) earn "
124
+ "the most, mid-tier crafting (stone tools, furnace prep) gets partial credit, early "
125
+ "tasks (collecting saplings/wood tools) only lightly scored."
126
+ ),
127
+ "weight": 0.35,
128
+ },
129
+ {
130
+ "id": "resource_stockpile",
131
+ "description": (
132
+ "Assess resource totals with emphasis on wood stores; high scores require abundant "
133
+ "wood plus supporting materials (stone, coal, iron) that signal readiness for "
134
+ "crafting."
135
+ ),
136
+ "weight": 0.2,
137
+ },
138
+ {
139
+ "id": "survival_state",
140
+ "description": (
141
+ "Reward finishing alive with healthy food/drink bars and safe positioning; penalize "
142
+ "deaths, low vitals, or lingering hazards at episode end."
143
+ ),
144
+ "weight": 0.2,
145
+ },
146
+ {
147
+ "id": "failure_analysis",
148
+ "description": (
149
+ "If the run ends in death or timeout, clearly identify the cause and deduct unless "
150
+ "the agent mitigated risk; highlight when the agent survives despite danger."
151
+ ),
152
+ "weight": 0.15,
153
+ },
154
+ {
155
+ "id": "future_readiness",
156
+ "description": (
157
+ "Describe how prepared the agent is for the next objectives (tools crafted, shelters, "
158
+ "furnaces, smelted materials) and whether the inventory supports further progress."
159
+ ),
160
+ "weight": 0.1,
161
+ },
162
+ ],
163
+ }
164
+
165
+ DEFAULT_EVENTS_RUBRIC_DATA: dict[str, Any] = {
166
+ "version": "1",
167
+ "goal_text": (
168
+ "Score each decision in proportion to the concrete Crafter achievement progress it "
169
+ "delivers, topping out the scale when the log shows a fresh achievement unlock and keeping "
170
+ "routine upkeep near zero."
171
+ ),
172
+ "aggregation": "weighted_sum",
173
+ "criteria": [
174
+ {
175
+ "id": "achievement_unlocks",
176
+ "description": (
177
+ "Assign 0.9-1.0 when the decision explicitly unlocks a new Crafter achievement (look "
178
+ 'for "Achievement unlocked" messages or equivalent deterministic completions such as '
179
+ "placing a furnace that immediately crafts ingots). Cap the score at 0.4 when no new "
180
+ "achievement fires, and drop to <=0.1 if the turn repeats known actions without "
181
+ "measurable progress."
182
+ ),
183
+ "weight": 0.55,
184
+ },
185
+ {
186
+ "id": "milestone_setup",
187
+ "description": (
188
+ "Give 0.5-0.7 when the action completes the last prerequisite for a specific upcoming "
189
+ "achievement (e.g., gathering the final ore before smelting, crafting sticks right "
190
+ "before a tool). Keep the score <=0.3 if the progress is speculative or still several "
191
+ "steps away."
192
+ ),
193
+ "weight": 0.2,
194
+ },
195
+ {
196
+ "id": "inventory_depth",
197
+ "description": (
198
+ "Reward 0.3-0.5 for pulls that clearly deepen critical buffers (fuel, food, ore) and "
199
+ "immediately unblock the next milestone. If resources are already plentiful or the "
200
+ "haul is generic filler, stay at <=0.2."
201
+ ),
202
+ "weight": 0.15,
203
+ },
204
+ {
205
+ "id": "execution_quality",
206
+ "description": (
207
+ "Only add up to 0.1 for clean, legal execution that avoids wasted turns; drop to 0.0 "
208
+ "whenever the agent idles, repeats failed moves, or takes damage without compensating "
209
+ "progress."
210
+ ),
211
+ "weight": 0.1,
212
+ },
213
+ ],
214
+ }
215
+
98
216
  for path in (REPO_ROOT, TASK_APP_ROOT, SYNTH_ENVS_HOSTED_ROOT, EXAMPLES_ROOT):
99
217
  try:
100
218
  resolved = path.resolve()
@@ -115,6 +233,28 @@ try:
115
233
  except Exception:
116
234
  pass
117
235
 
236
+ def _load_rubric_with_fallback(filename: str, fallback: dict[str, Any]):
237
+ """Load rubric from JSON file when available, otherwise use bundled fallback."""
238
+
239
+ search_paths = [RUBRICS_ROOT / filename, TASK_APP_ROOT / "rubrics" / filename]
240
+ for path in search_paths:
241
+ try:
242
+ if path.exists():
243
+ logger.debug("Loading rubric from %s", path)
244
+ return load_rubric(str(path))
245
+ except Exception as exc:
246
+ logger.warning("Failed to load rubric %s from %s: %s", filename, path, exc)
247
+
248
+ logger.warning("Falling back to inline rubric %s: file not available", filename)
249
+ try:
250
+ materialized = search_paths[0]
251
+ materialized.parent.mkdir(parents=True, exist_ok=True)
252
+ materialized.write_text(json.dumps(fallback, indent=2), encoding="utf-8")
253
+ except Exception:
254
+ logger.debug("Unable to materialize inline rubric %s", filename, exc_info=True)
255
+ return load_rubric(fallback)
256
+
257
+
118
258
  HAS_HOSTED = True
119
259
  try:
120
260
  import crafter # type: ignore
@@ -306,13 +446,16 @@ def build_dataset() -> tuple[TaskDatasetRegistry, CrafterDataset]:
306
446
  def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
307
447
  return TaskInfo(
308
448
  task={"id": "crafter_classic", "name": "Crafter Classic", "version": "1.0.0"},
309
- environments=["crafter"],
449
+ environment="crafter",
310
450
  action_space={
311
451
  "type": "discrete",
452
+ "description": f"Discrete action space with {len(crafter_constants.actions)} actions including movement, crafting, and interaction",
312
453
  "size": len(crafter_constants.actions),
313
454
  "actions": list(crafter_constants.actions),
314
455
  },
315
456
  observation={
457
+ "type": "dict",
458
+ "description": "RGB frame (64x64x3) plus inventory counts, achievements, and semantic map patches",
316
459
  "summary": "RGB frame plus inventory, achievements, and semantic map patches.",
317
460
  "keys": ["image", "inventory", "achievements", "semantic_map_patch7"],
318
461
  "image_shape": [64, 64, 3],
@@ -336,18 +479,17 @@ def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
336
479
  },
337
480
  "tool": {"name": "interact", "parallel_tool_calls": False},
338
481
  },
339
- capabilities={
340
- "supports_rollout": True,
341
- "supports_env_lifecycle": True,
342
- "requires_api_key_header": True,
343
- },
344
482
  limits={"max_ops": 100000, "max_time_s": 3600},
345
483
  )
346
484
 
347
485
 
348
- OUTCOME_RUBRIC = load_rubric(str(RUBRICS_ROOT / "crafter_outcome_rubric.json"))
486
+ OUTCOME_RUBRIC = _load_rubric_with_fallback(
487
+ "crafter_outcome_rubric.json", DEFAULT_OUTCOME_RUBRIC_DATA
488
+ )
349
489
 
350
- EVENTS_RUBRIC = load_rubric(str(RUBRICS_ROOT / "crafter_events_rubric.json"))
490
+ EVENTS_RUBRIC = _load_rubric_with_fallback(
491
+ "crafter_events_rubric.json", DEFAULT_EVENTS_RUBRIC_DATA
492
+ )
351
493
 
352
494
 
353
495
  def describe_taskset(dataset: CrafterDataset) -> dict[str, Any]:
@@ -366,29 +508,36 @@ def provide_task_instances(
366
508
  dataset: CrafterDataset, base_info: TaskInfo, seeds: Sequence[int]
367
509
  ) -> Iterable[TaskInfo]:
368
510
  infos: list[TaskInfo] = []
511
+ base_observation = getattr(base_info, "observation", None)
512
+ if hasattr(base_observation, "model_dump"):
513
+ observation_template = base_observation.model_dump()
514
+ elif isinstance(base_observation, dict):
515
+ observation_template = dict(base_observation)
516
+ else:
517
+ observation_template = {}
518
+
369
519
  for seed_value in seeds:
370
520
  summary = dataset.describe_seed(seed_value)
371
521
  infos.append(
372
522
  TaskInfo(
373
523
  task=base_info.task,
374
- environments=base_info.environments,
524
+ environment=base_info.environment,
375
525
  action_space=base_info.action_space,
376
526
  observation={
377
- **base_info.observation,
527
+ **observation_template,
378
528
  "seed": seed_value,
379
529
  "traits": summary["traits"],
380
530
  "inventory": summary["inventory"],
381
531
  "player_position": summary["player_position"],
382
532
  },
383
533
  dataset={
384
- **base_info.dataset,
534
+ **base_info.dataset.model_dump(),
385
535
  "seed": seed_value,
386
536
  "difficulty": summary["difficulty"],
387
537
  "config": summary["config"],
388
538
  },
389
539
  rubric=base_info.rubric,
390
540
  inference=base_info.inference,
391
- capabilities=base_info.capabilities,
392
541
  limits=base_info.limits,
393
542
  )
394
543
  )
@@ -488,9 +637,94 @@ def _coerce_math_to_crafter(request: RolloutRequest) -> RolloutRequest:
488
637
  return coerced
489
638
 
490
639
 
640
+ def _resolve_trace_correlation_id(policy_cfg: dict[str, Any], mode: Any = None) -> str | None:
641
+ """Best-effort extraction of the trace correlation identifier."""
642
+ candidates: list[Any] = [
643
+ policy_cfg.get("trace_correlation_id"),
644
+ policy_cfg.get("trace"),
645
+ ]
646
+ logger.debug(
647
+ "_resolve_trace_correlation_id: inspecting policy_cfg keys=%s candidates=%s",
648
+ sorted(policy_cfg.keys()),
649
+ candidates,
650
+ )
651
+ for candidate in candidates:
652
+ if isinstance(candidate, str):
653
+ stripped = candidate.strip()
654
+ if stripped:
655
+ return stripped
656
+
657
+ return extract_trace_correlation_id(policy_cfg.get("inference_url"), mode=mode)
658
+
659
+
491
660
  async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutResponse:
661
+ request = _coerce_math_to_crafter(request)
662
+
663
+ policy_cfg = dict(request.policy.config or {})
664
+ logger.info(
665
+ "ROLLOUT_EXEC: incoming policy config keys=%s inference_url=%s run_id=%s mode=%s",
666
+ sorted(policy_cfg.keys()),
667
+ policy_cfg.get("inference_url"),
668
+ request.run_id,
669
+ request.mode,
670
+ )
671
+ inferred_url = ensure_chat_completions_url(policy_cfg.get("inference_url"), mode=request.mode)
672
+ if isinstance(inferred_url, str) and inferred_url:
673
+ if inferred_url != policy_cfg.get("inference_url"):
674
+ logger.warning(
675
+ "ROLLOUT_EXEC: normalized inference_url run_id=%s from %s to %s",
676
+ request.run_id,
677
+ policy_cfg.get("inference_url"),
678
+ inferred_url,
679
+ )
680
+ policy_cfg["inference_url"] = inferred_url
681
+ else:
682
+ logger.warning(
683
+ "ROLLOUT_EXEC: inference_url missing or not normalized run_id=%s raw=%s",
684
+ request.run_id,
685
+ policy_cfg.get("inference_url"),
686
+ )
687
+
688
+ trace_correlation_id = _resolve_trace_correlation_id(policy_cfg, mode=request.mode)
689
+
690
+ # ASSERTION: trace_correlation_id MUST be present for RL mode (but not EVAL mode)
691
+ if request.mode == RolloutMode.RL:
692
+ assert trace_correlation_id is not None, (
693
+ f"FATAL: trace_correlation_id extraction failed for run_id={request.run_id}. "
694
+ f"policy_cfg_keys={sorted(policy_cfg.keys())} "
695
+ f"inference_url={policy_cfg.get('inference_url')}"
696
+ )
697
+ assert isinstance(trace_correlation_id, str) and trace_correlation_id.strip(), (
698
+ f"FATAL: trace_correlation_id is empty for run_id={request.run_id}. "
699
+ f"Got: {trace_correlation_id!r}"
700
+ )
701
+
702
+ if trace_correlation_id:
703
+ policy_cfg["trace_correlation_id"] = trace_correlation_id
704
+ logger.info(
705
+ "ROLLOUT_EXEC: resolved trace_correlation_id=%s run_id=%s",
706
+ trace_correlation_id,
707
+ request.run_id,
708
+ )
709
+
710
+ pipeline_metadata: dict[str, Any] = {}
711
+ if trace_correlation_id:
712
+ pipeline_metadata["trace_correlation_id"] = trace_correlation_id
713
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
714
+ pipeline_metadata.setdefault("inference_url", policy_cfg["inference_url"])
715
+ logger.info(
716
+ "ROLLOUT_EXEC: pipeline metadata prepared run_id=%s metadata=%s",
717
+ request.run_id,
718
+ pipeline_metadata,
719
+ )
720
+
492
721
  # If hosted env service code is not bundled, return a no-op rollout response compatible with contracts
493
722
  if not HAS_HOSTED:
723
+ logger.warning(
724
+ "ROLLOUT_EXEC: HAS_HOSTED disabled, returning stub response run_id=%s metadata=%s",
725
+ request.run_id,
726
+ pipeline_metadata,
727
+ )
494
728
  return RolloutResponse(
495
729
  run_id=request.run_id,
496
730
  trajectories=[],
@@ -505,11 +739,10 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
505
739
  aborted=False,
506
740
  ops_executed=0,
507
741
  trace=None,
742
+ trace_correlation_id=trace_correlation_id or f"trace_{request.run_id}",
743
+ pipeline_metadata=pipeline_metadata,
508
744
  )
509
745
 
510
- request = _coerce_math_to_crafter(request)
511
-
512
- policy_cfg = dict(request.policy.config or {})
513
746
  try:
514
747
  max_llm_calls = int(policy_cfg.get("max_llm_calls") or 10)
515
748
  except Exception:
@@ -540,6 +773,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
540
773
  converted_ops = converted_ops[:max_ops_allowed]
541
774
  legacy_request = LegacyRolloutRequest(
542
775
  run_id=request.run_id,
776
+ mode=request.mode, # Preserve mode for nested requests
543
777
  env=LegacyRolloutEnvSpec(
544
778
  env_id=request.env.env_id,
545
779
  env_name=request.env.env_name,
@@ -563,12 +797,79 @@ async def rollout_executor(request: RolloutRequest, fastapi_request) -> RolloutR
563
797
  legacy_response: LegacyRolloutResponse = await legacy_execute_rollout(
564
798
  legacy_request, fastapi_request
565
799
  )
800
+ logger.info(
801
+ "ROLLOUT_EXEC: legacy rollout completed run_id=%s trace_id=%s",
802
+ request.run_id,
803
+ trace_correlation_id,
804
+ )
566
805
  data = legacy_response.model_dump()
567
806
  metrics = data.get("metrics", {}) or {}
568
807
  metrics.setdefault("outcome_score", None)
569
808
  metrics.setdefault("events_score", None)
570
809
  metrics.setdefault("details", {})
571
810
  data["metrics"] = metrics
811
+
812
+ # Add trace_correlation_id at TOP-LEVEL (REQUIRED for RL training pipeline)
813
+ # Use fallback if somehow missing
814
+ data["trace_correlation_id"] = trace_correlation_id or f"trace_{request.run_id}"
815
+
816
+ # Add trace_correlation_id to pipeline_metadata
817
+ existing_meta = data.get("pipeline_metadata")
818
+ if not isinstance(existing_meta, dict):
819
+ existing_meta = {}
820
+ # ALWAYS set trace_correlation_id (use fallback if needed)
821
+ final_cid = trace_correlation_id or f"trace_{request.run_id}"
822
+ existing_meta["trace_correlation_id"] = final_cid
823
+ if isinstance(policy_cfg.get("inference_url"), str) and policy_cfg["inference_url"]:
824
+ existing_meta.setdefault("inference_url", policy_cfg["inference_url"])
825
+ data["pipeline_metadata"] = existing_meta
826
+
827
+ # Add trace_correlation_id to each trajectory (required for RL training pipeline)
828
+ if "trajectories" in data:
829
+ for traj in data.get("trajectories", []):
830
+ if isinstance(traj, dict):
831
+ traj["trace_correlation_id"] = final_cid
832
+ logger.info(
833
+ "ROLLOUT_EXEC: final pipeline metadata run_id=%s metadata=%s",
834
+ request.run_id,
835
+ existing_meta,
836
+ )
837
+ if trace_correlation_id and existing_meta.get("trace_correlation_id") != trace_correlation_id:
838
+ logger.error(
839
+ "ROLLOUT_EXEC: metadata trace mismatch run_id=%s expected=%s actual=%s",
840
+ request.run_id,
841
+ trace_correlation_id,
842
+ existing_meta.get("trace_correlation_id"),
843
+ )
844
+ if not existing_meta.get("trace_correlation_id"):
845
+ logger.error(
846
+ "ROLLOUT_EXEC: final metadata missing trace_correlation_id run_id=%s metadata=%s",
847
+ request.run_id,
848
+ existing_meta,
849
+ )
850
+
851
+ # ASSERTION: Verify trace_correlation_id is present in response at all required levels
852
+ assert "trace_correlation_id" in data, (
853
+ f"FATAL: trace_correlation_id missing from top-level response data for run_id={request.run_id}. "
854
+ f"Keys: {list(data.keys())}"
855
+ )
856
+ assert data["trace_correlation_id"] == final_cid, (
857
+ f"FATAL: trace_correlation_id mismatch in response for run_id={request.run_id}. "
858
+ f"Expected: {final_cid!r}, Got: {data.get('trace_correlation_id')!r}"
859
+ )
860
+ assert "pipeline_metadata" in data, (
861
+ f"FATAL: pipeline_metadata missing from response for run_id={request.run_id}"
862
+ )
863
+ assert data["pipeline_metadata"].get("trace_correlation_id") == final_cid, (
864
+ f"FATAL: trace_correlation_id missing or mismatched in pipeline_metadata for run_id={request.run_id}. "
865
+ f"Expected: {final_cid!r}, Got: {data['pipeline_metadata'].get('trace_correlation_id')!r}"
866
+ )
867
+ logger.info(
868
+ "ROLLOUT_EXEC: assertions passed - trace_correlation_id present in response run_id=%s cid=%s",
869
+ request.run_id,
870
+ final_cid,
871
+ )
872
+
572
873
  return RolloutResponse.model_validate(data)
573
874
 
574
875
 
@@ -612,7 +913,7 @@ def build_config() -> TaskAppConfig:
612
913
  routers: tuple = (environment_router, policy_router, branching_router) if HAS_HOSTED else ()
613
914
 
614
915
  config = TaskAppConfig(
615
- app_id="grpo-crafter",
916
+ app_id="grpo-crafter-task-app",
616
917
  name="GRPO Crafter Task App",
617
918
  description="Crafter Classic environment with GRPO task endpoints and LLM proxies.",
618
919
  base_task_info=base_info,
@@ -633,7 +934,7 @@ def build_config() -> TaskAppConfig:
633
934
 
634
935
  register_task_app(
635
936
  entry=TaskAppEntry(
636
- app_id="grpo-crafter",
937
+ app_id="grpo-crafter-task-app",
637
938
  description="Crafter Classic task app with rollout + proxy endpoints",
638
939
  config_factory=build_config,
639
940
  aliases=("crafter", "crafter-task"),
@@ -659,7 +960,9 @@ register_task_app(
659
960
  # Mount repo root so local modules resolve when deployed on Modal
660
961
  (str(REPO_ROOT), "/opt/synth_ai_repo"),
661
962
  (str(REPO_ROOT / "synth_ai"), "/opt/synth_ai_repo/synth_ai"),
662
- (str(TASK_APP_ROOT), "/opt/synth_ai_repo/examples/warming_up_to_rl/task_app"),
963
+ (str(TASK_APP_ROOT), "/opt/synth_ai_repo/examples/task_apps/crafter/task_app"),
964
+ # Explicitly mount rubrics directory
965
+ (str(RUBRICS_ROOT), "/opt/synth_ai_repo/examples/multi_step/rubrics"),
663
966
  ),
664
967
  secret_names=("groq-api-key", "openai-api-key"),
665
968
  memory=16384,
@@ -1,7 +1,7 @@
1
1
  """Compatibility wrapper for the GRPO Crafter task app.
2
2
 
3
3
  This module now delegates to the TaskAppConfig defined in the colocated example at
4
- `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
4
+ `examples/task_apps/crafter/task_app/grpo_crafter.py`. It is kept for legacy usage
5
5
  (running the file directly or targeting `fastapi_app` from external tooling). Prefer using
6
6
  `uvx synth-ai serve grpo-crafter` for local development and testing.
7
7
  """
@@ -209,6 +209,16 @@ class CrafterEnvironmentWrapper:
209
209
  logger.info("No valid actions provided, defaulting to noop")
210
210
  normalized.append(EnvToolCall(tool="interact", args={"action": 0})) # noop action
211
211
 
212
+ # Limit to first 20 actions to prevent spam from overly long tool calls
213
+ MAX_ACTIONS_PER_STEP = 20
214
+ if len(normalized) > MAX_ACTIONS_PER_STEP:
215
+ logger.warning(
216
+ "Tool call contained %d actions, limiting to first %d to prevent spam",
217
+ len(normalized),
218
+ MAX_ACTIONS_PER_STEP,
219
+ )
220
+ normalized = normalized[:MAX_ACTIONS_PER_STEP]
221
+
212
222
  # Pre-step logging: capture current public state and print concise summary
213
223
  before_state: dict[str, Any] | None = None
214
224
  try:
@@ -44,6 +44,8 @@ class CrafterPolicy(Policy):
44
44
  self.inference_url = inference_url
45
45
  self.model = model
46
46
  self.use_tools = True
47
+ self.use_vision = False # Enable vision for VLMs
48
+ self.image_only_mode = False # If True, only send images without text observations
47
49
  # Sampling parameters (populated via initialize(config))
48
50
  self.temperature: float | None = None
49
51
  self.top_p: float | None = None
@@ -63,6 +65,16 @@ class CrafterPolicy(Policy):
63
65
  self.model = config["model"]
64
66
  if "use_tools" in config:
65
67
  self.use_tools = bool(config["use_tools"])
68
+ if "use_vision" in config:
69
+ self.use_vision = bool(config["use_vision"])
70
+ if "image_only_mode" in config:
71
+ self.image_only_mode = bool(config["image_only_mode"])
72
+ # If image_only_mode is enabled, automatically enable vision
73
+ if self.image_only_mode:
74
+ self.use_vision = True
75
+ # Auto-detect vision capability from model name if not explicitly set
76
+ if "use_vision" not in config and self.model:
77
+ self.use_vision = self._is_vision_model(self.model)
66
78
  # Adopt sampling params from policy config (trainer passes these through)
67
79
  if "temperature" in config:
68
80
  self.temperature = float(config["temperature"]) # fail fast on bad types
@@ -384,6 +396,7 @@ class CrafterPolicy(Policy):
384
396
  "inference_url": self.inference_url,
385
397
  "model": self.model,
386
398
  "use_tools": self.use_tools,
399
+ "use_vision": self.use_vision,
387
400
  },
388
401
  "state": self.state_dict(),
389
402
  }
@@ -396,7 +409,8 @@ class CrafterPolicy(Policy):
396
409
  inference_url=config["inference_url"],
397
410
  model=config.get("model"),
398
411
  )
399
- policy.use_tools = bool(config["use_tools"])
412
+ policy.use_tools = bool(config.get("use_tools", True))
413
+ policy.use_vision = bool(config.get("use_vision", False))
400
414
  policy.load_state_dict(state)
401
415
  return policy
402
416
 
@@ -409,14 +423,21 @@ class CrafterPolicy(Policy):
409
423
  """Prepare an inference request (implementing abstract method)."""
410
424
  # Format observation with rich contextual information
411
425
  observation_text = self._format_observation_for_llm(observation)
412
- image_parts = self._extract_image_parts(observation)
413
-
414
- # Build messages (observation_text already formatted; no raw matrices)
426
+
427
+ # Extract image parts based on vision settings
428
+ if self.use_vision:
429
+ image_parts = self._extract_image_parts(observation)
430
+ else:
431
+ # Text-only mode: don't include any images
432
+ image_parts = []
433
+
434
+ # Build messages with appropriate mode
415
435
  messages = CrafterReActAgent.build_messages(
416
436
  observation=observation_text,
417
437
  history=history,
418
438
  turn=self.turn_index,
419
439
  image_parts=image_parts,
440
+ image_only_mode=self.image_only_mode,
420
441
  )
421
442
 
422
443
  # Return messages and tools schema
@@ -446,12 +467,60 @@ class CrafterPolicy(Policy):
446
467
 
447
468
  return format_observation(obs_data, step_count=step_idx, max_steps=max_steps)
448
469
 
470
+ @staticmethod
471
+ def _is_vision_model(model_name: str) -> bool:
472
+ """Check if a model supports vision/image inputs based on its name."""
473
+ if not model_name:
474
+ return False
475
+
476
+ model_lower = model_name.lower()
477
+
478
+ # Known vision-capable model patterns
479
+ vision_patterns = [
480
+ "gpt-4o", # GPT-4o series
481
+ "gpt-4-turbo", # GPT-4 Turbo with vision
482
+ "gpt-4-vision", # Explicit vision variant
483
+ "gpt-5", # GPT-5 series (all variants support vision)
484
+ "claude-3", # All Claude 3 models support vision
485
+ "gemini", # Gemini models
486
+ "qwen-vl", # Qwen Vision-Language models
487
+ "qwen2-vl", # Qwen2 VL
488
+ "pixtral", # Mistral's vision model
489
+ "llava", # LLaVA models
490
+ "phi-3-vision", # Microsoft Phi-3 Vision
491
+ "internvl", # InternVL models
492
+ "cogvlm", # CogVLM models
493
+ "vision", # Generic vision indicator
494
+ ]
495
+
496
+ return any(pattern in model_lower for pattern in vision_patterns)
497
+
449
498
  def _extract_image_parts(
450
499
  self, observation: dict[str, Any] | None
451
500
  ) -> list[dict[str, Any]]:
452
- """Crafter policy uses text-only prompts; do not attach image parts."""
453
-
454
- return []
501
+ """Extract image parts from crafter observation for vision-capable models.
502
+
503
+ Returns OpenAI-style image_url format if vision is enabled and image data is available.
504
+ """
505
+ # Only extract images if vision is enabled for this policy
506
+ if not self.use_vision:
507
+ return []
508
+
509
+ if not observation:
510
+ return []
511
+
512
+ # Get the observation data (could be nested)
513
+ obs = observation.get("observation", observation)
514
+ if not isinstance(obs, dict):
515
+ return []
516
+
517
+ # Extract the data URL (includes base64-encoded image)
518
+ data_url = obs.get("observation_image_data_url")
519
+ if not data_url or not isinstance(data_url, str):
520
+ return []
521
+
522
+ # Return OpenAI-style image_url format
523
+ return [{"type": "image_url", "image_url": {"url": data_url}}]
455
524
 
456
525
  def parse_model_response(
457
526
  self, response: str, observation: dict[str, Any]