synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,48 @@
1
+
2
+
3
+ Crafter
4
+
5
+ cd /Users/joshpurtell/Documents/GitHub/synth-ai && uvx synth-ai modal-serve grpo-crafter-task-app --name grpo-crafter-task-app --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/crafter/.env
6
+
7
+ cd /Users/joshpurtell/Documents/GitHub/monorepo && uv run modal deploy backend/app/routes/clustered_training/core/algorithms/gspo/app.py --env dev
8
+
9
+ uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml
10
+
11
+
12
+ uvx synth-ai train \
13
+ --type rl \
14
+ --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml \
15
+ --task-url https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run \
16
+ --backend https://synth-backend-dev-docker.onrender.com/api \
17
+ --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/crafter/.env
18
+
19
+
20
+
21
+ ---
22
+
23
+ Verilog
24
+
25
+ # 1. Deploy Verilog task app
26
+ cd /Users/joshpurtell/Documents/GitHub/synth-ai && uvx synth-ai modal-serve grpo-verilog --name grpo-verilog-task-app --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/verilog/.env
27
+
28
+ # 2. Baseline eval using Synth backend (pre-training)
29
+ uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_eval_synth_qwen4b.toml
30
+
31
+ # 3. (Optional) External reference eval using Groq Qwen 32B
32
+ uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_eval_groq_qwen32b.toml
33
+
34
+ # 4. Deploy training backend
35
+ cd /Users/joshpurtell/Documents/GitHub/monorepo && uv run modal deploy backend/app/routes/clustered_training/core/algorithms/gspo/app.py --env dev
36
+
37
+ # 5. Run RL training
38
+ uvx synth-ai train \
39
+ --type rl \
40
+ --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_rl_lora.toml \
41
+ --task-url https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run \
42
+ --backend https://synth-backend-dev-docker.onrender.com/api \
43
+ --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/verilog/.env
44
+
45
+ # 6. Post-training eval (update job_id in config first!)
46
+ # After training, note the job_id from logs (e.g., job_19a1823e56303de604f)
47
+ # Update verilog_eval_synth_trained_qwen8b.toml with your job_id
48
+ uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_eval_synth_trained_qwen8b.toml
@@ -0,0 +1,218 @@
1
+ # Verilog RL with LoRA Analysis
2
+
3
+ ## Executive Summary
4
+
5
+ **✅ YES, Verilog can absolutely do RL with LoRA just like Crafter!** The architecture is nearly identical, but there are important considerations around model size and task complexity.
6
+
7
+ ## Architecture Compatibility ✅
8
+
9
+ ### **Same Foundation** (No changes needed)
10
+ - ✅ **Contracts**: Uses identical `RolloutRequest`/`RolloutResponse` as Crafter
11
+ - ✅ **Task App Framework**: Same `synth_ai.task.apps` framework
12
+ - ✅ **Environment Pattern**: Same `StatefulEnvironment` + tool-based architecture
13
+ - ✅ **Rubrics System**: Same evaluation and reward system
14
+ - ✅ **Trace Correlation**: Already implemented in `rollout_executor` (line 817 in `grpo_verilog.py`)
15
+ - ✅ **Modal Deployment**: Same deployment pattern as Crafter
16
+
17
+ ### **Key Differences** (Considerations for LoRA)
18
+
19
+ #### 1. **Model Size: 8x Larger** ⚠️
20
+ ```toml
21
+ # Verilog (current)
22
+ model = "qwen/qwen3-32b" # 32B parameters
23
+
24
+ # Crafter (working)
25
+ model = "Qwen/Qwen3-4B" # 4B parameters
26
+ ```
27
+ **Impact**: Memory requirements 8x higher for LoRA training
28
+ **Solution**: Use gradient checkpointing, smaller batch sizes, or distributed training
29
+
30
+ #### 2. **Tool Set: Simpler but More Structured**
31
+ ```python
32
+ # Verilog Tools (4 tools)
33
+ TOOLS = ["write_file", "compile", "simulate", "submit"]
34
+
35
+ # Crafter Tools (20+ tools)
36
+ # craft, move, attack, gather, etc.
37
+ ```
38
+
39
+ **Verilog Advantages**:
40
+ - ✅ **Deterministic**: Write → Compile → Simulate → Submit workflow
41
+ - ✅ **Clear Success Criteria**: Tests pass = high reward
42
+ - ✅ **Sparse but Meaningful Rewards**: +10 for submit success, +1 for simulation pass
43
+
44
+ **Verilog Challenges**:
45
+ - ❌ **Sparser Rewards**: Fewer intermediate signals for learning
46
+ - ❌ **Longer Sequences**: Multi-step compilation chains
47
+ - ❌ **Error Recovery**: Must debug compilation failures
48
+
49
+ #### 3. **State Representation**
50
+ ```python
51
+ # Verilog State (file-based)
52
+ {
53
+ "files": {"TopModule.v": "module TopModule(..."},
54
+ "compile_status": "Last compile: Success",
55
+ "simulate_status": "Last simulation: Passed",
56
+ "task_completed": false
57
+ }
58
+
59
+ # Crafter State (world-based)
60
+ {
61
+ "inventory": {"wood": 5, "stone": 3},
62
+ "position": [x, y],
63
+ "nearby_entities": [...],
64
+ "achievement_unlocked": true
65
+ }
66
+ ```
67
+
68
+ ## Configuration for LoRA RL
69
+
70
+ ### **Option 1: Qwen3-0.6B (Recommended for testing)** ⭐
71
+ ```toml
72
+ [algorithm]
73
+ type = "online"
74
+ method = "policy_gradient"
75
+ variety = "gspo"
76
+
77
+ [model]
78
+ base = "Qwen/Qwen3-0.6B" # ✅ Same as existing SFT configs
79
+ trainer_mode = "lora"
80
+
81
+ [lora]
82
+ r = 16
83
+ alpha = 32
84
+ dropout = 0.05
85
+ target_modules = ["all-linear"]
86
+
87
+ [rollout]
88
+ env_name = "verilog"
89
+ max_turns = 15
90
+ policy_name = "verilog-designer"
91
+
92
+ [training]
93
+ batch_size = 4 # ✅ Same as Crafter
94
+ gradient_accumulation_steps = 1
95
+ ```
96
+
97
+ ### **Option 2: Qwen3-32B (Production)** ⚠️
98
+ ```toml
99
+ [algorithm]
100
+ type = "online"
101
+ method = "policy_gradient"
102
+ variety = "gspo"
103
+
104
+ [model]
105
+ base = "qwen/qwen3-32b" # ⚠️ 8x memory vs Crafter's 4B
106
+ trainer_mode = "lora"
107
+
108
+ [lora]
109
+ r = 16
110
+ alpha = 32
111
+ dropout = 0.05
112
+ target_modules = ["all-linear"]
113
+
114
+ [rollout]
115
+ env_name = "verilog"
116
+ max_turns = 15
117
+ policy_name = "verilog-designer"
118
+ ```
119
+
120
+ ### **Memory Optimization** (for 32B model)
121
+ ```toml
122
+ [vllm]
123
+ max_model_len = 4096 # Shorter than Crafter's 8192
124
+ tensor_parallel_size = 2 # Distribute across GPUs
125
+
126
+ [training]
127
+ batch_size = 2 # Smaller than Crafter's 4
128
+ gradient_accumulation_steps = 4
129
+ ```
130
+
131
+ ## Task App Changes Needed
132
+
133
+ ### **1. Mode Parameter Support** ✅ (Already implemented)
134
+ The Verilog task app already handles `mode="rl"` correctly:
135
+ ```python
136
+ # In grpo_verilog.py rollout_executor
137
+ policy_config = dict(policy_config_raw)
138
+ # ... mode parameter flows through naturally
139
+ ```
140
+
141
+ ### **2. Trace Correlation** ✅ (Already implemented)
142
+ ```python
143
+ # Line 817 in grpo_verilog.py
144
+ trajectory = RolloutTrajectory(
145
+ # ...
146
+ inference_url=agent.inference_url, # ✅ Required for trace correlation
147
+ decision_samples=None,
148
+ )
149
+ ```
150
+
151
+ ### **3. Rubric Integration** ✅ (Already configured)
152
+ ```python
153
+ # In grpo_verilog.py
154
+ rubrics=RubricBundle(
155
+ outcome=OUTCOME_RUBRIC, # Tests pass reward
156
+ events=EVENTS_RUBRIC, # Process efficiency reward
157
+ )
158
+ ```
159
+
160
+ ## RL Training Feasibility
161
+
162
+ ### **✅ Works Great**
163
+ 1. **Clear Success Signal**: Submit passing tests = +10 reward
164
+ 2. **Guided Process**: Natural write→compile→simulate→submit progression
165
+ 3. **Error Learning**: Agent must learn to debug compilation failures
166
+ 4. **Hardware Design**: Real-world applicable skills
167
+
168
+ ### **⚠️ Challenges**
169
+ 1. **Model Size**: 32B vs 4B = 8x memory, slower training
170
+ 2. **Sparse Rewards**: Fewer learning signals than Crafter's dense rewards
171
+ 3. **Longer Episodes**: 15+ steps vs Crafter's 10 steps
172
+ 4. **Compilation Errors**: Must learn to interpret and fix syntax errors
173
+
174
+ ## Recommended Approach
175
+
176
+ ### **Phase 1: Start with Qwen3-0.6B** ⭐ (as you requested)
177
+ ```toml
178
+ # Perfect for testing - same model used in existing SFT configs
179
+ model = "Qwen/Qwen3-0.6B"
180
+ batch_size = 4 # Same as Crafter
181
+ ```
182
+ - ✅ **Zero setup**: Already configured in `synth-ai/examples/sft/configs/crafter_lora_qwen0p6b.toml`
183
+ - ✅ **Fast iteration**: 0.6B parameters = quick training cycles
184
+ - ✅ **Memory efficient**: Fits on single GPU easily
185
+ - ✅ **Proven baseline**: Same model used in RL demos and SFT examples
186
+
187
+ ### **Phase 2: Scale to Qwen3-8B** (if 0.6B works well)
188
+ ```toml
189
+ model = "qwen/qwen3-8b"
190
+ batch_size = 2
191
+ gradient_accumulation_steps = 2
192
+ ```
193
+
194
+ ### **Phase 3: Production with Qwen3-32B**
195
+ ```toml
196
+ model = "qwen/qwen3-32b"
197
+ tensor_parallel_size = 2
198
+ batch_size = 1
199
+ gradient_accumulation_steps = 4
200
+ ```
201
+
202
+ ### **Phase 3: Optimize for Verilog Domain**
203
+ Consider fine-tuning the base model on:
204
+ - Verilog syntax and semantics
205
+ - Hardware design patterns
206
+ - Compilation error messages
207
+ - Testbench writing
208
+
209
+ ## Conclusion
210
+
211
+ **✅ Verilog RL with LoRA is absolutely feasible** and should work with the same pipeline as Crafter. The main differences are:
212
+
213
+ 1. **Larger model** (32B vs 4B) requires memory optimization
214
+ 2. **Sparser rewards** may need different reward shaping
215
+ 3. **More structured tasks** could actually make learning easier
216
+ 4. **Real hardware skills** make it more valuable than game tasks
217
+
218
+ **Recommended next step**: Create a `verilog_rl_lora.toml` config starting with Qwen3-8B and adapt the reward rubrics for the compilation workflow.
@@ -3,7 +3,7 @@
3
3
  [algorithm]
4
4
  type = "offline"
5
5
  method = "sft"
6
- variety = "fft"
6
+ variety = "lora"
7
7
 
8
8
  [job]
9
9
  model = "Qwen/Qwen3-Coder-30B-A3B-Instruct"
examples/sft/evaluate.py CHANGED
@@ -44,6 +44,7 @@ def _ops(n: int) -> list[str]:
44
44
 
45
45
 
46
46
  def _request(seed: int, a: EvalArgs) -> RolloutRequest:
47
+ from synth_ai.task.contracts import RolloutMode
47
48
  return RolloutRequest(
48
49
  run_id=f"eval-{seed}",
49
50
  env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
@@ -53,6 +54,7 @@ def _request(seed: int, a: EvalArgs) -> RolloutRequest:
53
54
  ),
54
55
  ops=_ops(a.max_llm_calls),
55
56
  record=RolloutRecordConfig(trajectories=True, return_trace=False, trace_format="compact"),
57
+ mode=RolloutMode.EVAL,
56
58
  )
57
59
 
58
60
 
@@ -42,6 +42,7 @@ def _build_ops(max_llm_calls: int) -> list[str]:
42
42
 
43
43
 
44
44
  def _build_request(seed: int, run_id: str, model: str, inference_url: str, api_key: str, *, max_llm_calls: int, return_trace: bool) -> RolloutRequest:
45
+ from synth_ai.task.contracts import RolloutMode
45
46
  policy_cfg: dict[str, Any] = {
46
47
  "model": model,
47
48
  "inference_url": inference_url,
@@ -54,6 +55,7 @@ def _build_request(seed: int, run_id: str, model: str, inference_url: str, api_k
54
55
  policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_cfg),
55
56
  ops=_build_ops(max_llm_calls),
56
57
  record=record,
58
+ mode=RolloutMode.EVAL,
57
59
  )
58
60
 
59
61
 
@@ -60,34 +60,55 @@ try:
60
60
  HAS_HOSTED = True
61
61
  except Exception:
62
62
  try: # pragma: no cover - optional dependency path
63
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.branching import ( # type: ignore
64
- router as branching_router,
63
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.branching import ( # type: ignore
64
+ BranchingEnvironmentConfig,
65
65
  )
66
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.environment_routes import ( # type: ignore # noqa: E501
67
- router as environment_router,
66
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.environment_routes import ( # type: ignore # noqa: E501
67
+ CrafterEnvironmentRoutes,
68
68
  )
69
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.policy_routes import ( # type: ignore
70
- router as policy_router,
69
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.policy_routes import ( # type: ignore
70
+ PolicyRoutes,
71
71
  )
72
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import ( # type: ignore
72
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import ( # type: ignore
73
+ RolloutPayload,
74
+ )
75
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
76
+ EnvironmentConfig,
77
+ )
78
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
79
+ PolicyConfig,
80
+ )
81
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
82
+ RolloutRequest,
83
+ )
84
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
85
+ RolloutResponse,
86
+ )
87
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
88
+ RunSpec,
89
+ )
90
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
91
+ ToolUse,
92
+ )
93
+ from examples.task_apps.crafter.task_app.hosted.rollout import ( # type: ignore
73
94
  RolloutEnvSpec as LegacyRolloutEnvSpec,
74
95
  )
75
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
96
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
76
97
  RolloutPolicySpec as LegacyRolloutPolicySpec,
77
98
  )
78
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
99
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
79
100
  RolloutRecordConfig as LegacyRolloutRecordConfig,
80
101
  )
81
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
102
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
82
103
  RolloutRequest as LegacyRolloutRequest,
83
104
  )
84
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
105
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
85
106
  RolloutResponse as LegacyRolloutResponse,
86
107
  )
87
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
108
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
88
109
  RolloutSafetyConfig as LegacyRolloutSafetyConfig,
89
110
  )
90
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
111
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
91
112
  execute_rollout as legacy_execute_rollout,
92
113
  )
93
114
  HAS_HOSTED = True
@@ -264,7 +285,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, MiniSweDataset]:
264
285
  def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
265
286
  return TaskInfo(
266
287
  task={"id": "swe_mini", "name": "mini-SWE Tasks", "version": "0.1.0"},
267
- environments=["swe-mini"],
288
+ environment="swe-mini",
268
289
  action_space={
269
290
  "type": "tool",
270
291
  "tools": ["run_command", "submit_patch"],
@@ -292,11 +313,6 @@ def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
292
313
  },
293
314
  "tool": {"name": "run_command", "parallel_tool_calls": False},
294
315
  },
295
- capabilities={
296
- "supports_rollout": True,
297
- "supports_env_lifecycle": True,
298
- "requires_api_key_header": True,
299
- },
300
316
  limits={"max_ops": 2000, "max_time_s": 7200},
301
317
  )
302
318
 
@@ -348,18 +364,31 @@ def provide_task_instances(
348
364
  dataset: MiniSweDataset, base_info: TaskInfo, seeds: Sequence[int]
349
365
  ) -> Iterable[TaskInfo]:
350
366
  infos: list[TaskInfo] = []
367
+ base_observation = getattr(base_info, "observation", None)
368
+ if hasattr(base_observation, "model_dump"):
369
+ base_observation_data = base_observation.model_dump()
370
+ elif isinstance(base_observation, dict):
371
+ base_observation_data = dict(base_observation)
372
+ else:
373
+ base_observation_data = {}
374
+
351
375
  for seed in seeds:
352
376
  instance = dataset.sample_by_index(int(seed))
353
377
  infos.append(
354
378
  TaskInfo(
355
379
  task=base_info.task,
356
- environments=base_info.environments,
380
+ environment=base_info.environment,
357
381
  action_space=base_info.action_space,
358
- observation={**base_info.observation, "instance_id": instance["instance_id"]},
359
- dataset={**base_info.dataset, "instance_id": instance["instance_id"]},
382
+ observation={
383
+ **base_observation_data,
384
+ "instance_id": instance["instance_id"],
385
+ },
386
+ dataset={
387
+ **base_info.dataset.model_dump(),
388
+ "instance_id": instance["instance_id"],
389
+ },
360
390
  rubric=base_info.rubric,
361
391
  inference=base_info.inference,
362
- capabilities=base_info.capabilities,
363
392
  limits=base_info.limits,
364
393
  )
365
394
  )
@@ -397,10 +426,10 @@ def build_config() -> TaskAppConfig:
397
426
  HostedTaskAppCls = HostedTaskApp
398
427
  except Exception:
399
428
  try:
400
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.hosted_app import ( # type: ignore
401
- TaskApp as HostedTaskApp,
429
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.hosted_app import ( # type: ignore
430
+ create_app,
402
431
  )
403
- HostedTaskAppCls = HostedTaskApp
432
+ HostedTaskAppCls = create_app
404
433
  except Exception as exc: # pragma: no cover - optional dependency path
405
434
  logger.warning("Unable to import HostedTaskApp for swe-mini: %s", exc)
406
435
  if HostedTaskAppCls is not None:
@@ -455,6 +484,7 @@ def build_config() -> TaskAppConfig:
455
484
 
456
485
  legacy_request = LegacyRolloutRequest(
457
486
  run_id=request.run_id,
487
+ mode=request.mode, # Preserve mode for nested requests
458
488
  env=LegacyRolloutEnvSpec(
459
489
  env_id=request.env.env_id,
460
490
  env_name=env_spec.env_name or "swe-mini",
@@ -12,6 +12,7 @@ from fastapi import APIRouter, HTTPException, Request, status
12
12
  from pydantic import BaseModel
13
13
  from synth_ai.lm.vendors.base import BaseLMResponse
14
14
  from synth_ai.task.tracing_utils import unique_sft_path
15
+ from synth_ai.task.contracts import RolloutMode
15
16
  from synth_ai.tracing_v3.abstractions import EnvironmentEvent, LMCAISEvent, TimeRecord
16
17
  from synth_ai.tracing_v3.llm_call_record_helpers import create_llm_call_record_from_response
17
18
  from synth_ai.tracing_v3.session_tracer import SessionTracer
@@ -120,6 +121,7 @@ class RolloutRequest(BaseModel):
120
121
  # Optional run/session context
121
122
  training_session_id: str | None = None
122
123
  synth_base_url: str | None = None
124
+ mode: RolloutMode # Required: explicit RL vs EVAL mode
123
125
 
124
126
 
125
127
  class RolloutStep(BaseModel):
@@ -1238,6 +1240,15 @@ async def execute_rollout(
1238
1240
  )
1239
1241
 
1240
1242
  # Build partial trajectory and return HTTP 200
1243
+ # Extract inference_url from policy meta (best effort)
1244
+ inference_url = None
1245
+ if policy_handle is not None:
1246
+ try:
1247
+ policy_snapshot = policy_handle.snapshot()
1248
+ inference_url = policy_snapshot.get("config", {}).get("inference_url")
1249
+ except Exception:
1250
+ pass
1251
+
1241
1252
  trajectory = RolloutTrajectory(
1242
1253
  env_id=env_id,
1243
1254
  policy_id=policy_id,
@@ -1249,6 +1260,7 @@ async def execute_rollout(
1249
1260
  "at_op": op,
1250
1261
  },
1251
1262
  length=len(trajectory_steps),
1263
+ inference_url=inference_url, # NEW: Required for trace correlation
1252
1264
  decision_samples=decision_samples if step_rewards_active else None,
1253
1265
  )
1254
1266
  metrics = RolloutMetrics(
@@ -1369,6 +1381,15 @@ async def execute_rollout(
1369
1381
  },
1370
1382
  )
1371
1383
  trajectory_steps.append(term_step)
1384
+ # Extract inference_url from policy meta (best effort)
1385
+ inference_url = None
1386
+ if policy_handle is not None:
1387
+ try:
1388
+ policy_snapshot = policy_handle.snapshot()
1389
+ inference_url = policy_snapshot.get("config", {}).get("inference_url")
1390
+ except Exception:
1391
+ pass
1392
+
1372
1393
  trajectory = RolloutTrajectory(
1373
1394
  env_id=env_id,
1374
1395
  policy_id=policy_id,
@@ -1379,6 +1400,7 @@ async def execute_rollout(
1379
1400
  "at_op": op,
1380
1401
  },
1381
1402
  length=len(trajectory_steps),
1403
+ inference_url=inference_url, # NEW: Required for trace correlation
1382
1404
  decision_samples=decision_samples if step_rewards_active else None,
1383
1405
  )
1384
1406
  metrics = RolloutMetrics(
@@ -1460,6 +1482,15 @@ async def execute_rollout(
1460
1482
  )
1461
1483
  trajectory_steps.append(term_step)
1462
1484
  # Build partial response
1485
+ # Extract inference_url from policy meta (best effort)
1486
+ inference_url = None
1487
+ if policy_handle is not None:
1488
+ try:
1489
+ policy_snapshot = policy_handle.snapshot()
1490
+ inference_url = policy_snapshot.get("config", {}).get("inference_url")
1491
+ except Exception:
1492
+ pass
1493
+
1463
1494
  trajectory = RolloutTrajectory(
1464
1495
  env_id=env_id,
1465
1496
  policy_id=policy_id,
@@ -1471,6 +1502,7 @@ async def execute_rollout(
1471
1502
  "at_op": op,
1472
1503
  },
1473
1504
  length=len(trajectory_steps),
1505
+ inference_url=inference_url, # NEW: Required for trace correlation
1474
1506
  decision_samples=decision_samples if step_rewards_active else None,
1475
1507
  )
1476
1508
  metrics = RolloutMetrics(
@@ -1688,12 +1720,22 @@ async def execute_rollout(
1688
1720
  timing_final.setdefault("overhead_ms", 0.0)
1689
1721
 
1690
1722
  # Build trajectory
1723
+ # Extract inference_url from policy meta
1724
+ inference_url = None
1725
+ if policy_handle is not None:
1726
+ try:
1727
+ policy_snapshot = policy_handle.snapshot()
1728
+ inference_url = policy_snapshot.get("config", {}).get("inference_url")
1729
+ except Exception:
1730
+ pass
1731
+
1691
1732
  trajectory = RolloutTrajectory(
1692
1733
  env_id=env_id,
1693
1734
  policy_id=policy_id,
1694
1735
  steps=trajectory_steps,
1695
1736
  final={"observation": _summarize_observation_for_storage(env_handle, current_obs)},
1696
1737
  length=len(trajectory_steps),
1738
+ inference_url=inference_url, # NEW: Required for trace correlation
1697
1739
  decision_samples=decision_samples if step_rewards_active else None,
1698
1740
  )
1699
1741
 
@@ -1,15 +1,14 @@
1
1
  #!/usr/bin/env python3
2
- """
3
- Simple test script for the GRPO Synth Envs Hosted Service.
4
-
5
- Run this after starting the service with:
6
- python main.py
7
- """
2
+ """Manual smoke script for the GRPO Synth Envs Hosted Service."""
8
3
 
9
4
  import asyncio
10
5
  import json
11
6
 
12
7
  import httpx
8
+ import pytest
9
+
10
+
11
+ pytestmark = pytest.mark.skip(reason="Requires running hosted service on localhost:8000")
13
12
 
14
13
 
15
14
  async def test_service():