synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (229) hide show
  1. examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
  2. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
  4. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
  5. examples/multi_step/crafter_rl_lora.md +51 -10
  6. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  7. examples/multi_step/task_app_config_notes.md +7 -1
  8. examples/swe/task_app/grpo_swe_mini.py +55 -26
  9. examples/swe/task_app/hosted/rollout.py +40 -0
  10. examples/swe/task_app/hosted/test_service.py +5 -6
  11. examples/task_apps/TESTING.md +275 -0
  12. examples/task_apps/__init__.py +0 -0
  13. examples/task_apps/crafter/__init__.py +0 -0
  14. examples/task_apps/crafter/task_app/__init__.py +2 -0
  15. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
  16. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  17. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
  18. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
  19. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
  20. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
  21. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  22. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  23. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  24. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  25. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  26. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  27. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  28. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  29. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  30. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  31. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  32. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  33. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  34. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  35. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  36. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  37. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  38. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  39. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  40. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  41. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  42. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  43. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  44. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  45. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  78. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  79. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  80. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  81. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  82. examples/task_apps/enron/__init__.py +1 -0
  83. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  84. examples/task_apps/enron/task_app/README.md +14 -0
  85. examples/task_apps/enron/task_app/__init__.py +1 -0
  86. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  87. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  88. examples/task_apps/enron/tests/__init__.py +2 -0
  89. examples/task_apps/enron/tests/conftest.py +115 -0
  90. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  91. examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
  92. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  93. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  94. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  95. examples/task_apps/math/__init__.py +0 -0
  96. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  97. examples/task_apps/pokemon_battle/__init__.py +2 -0
  98. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  99. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  100. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  101. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  102. examples/task_apps/pokemon_red/README.md +357 -0
  103. examples/task_apps/pokemon_red/__init__.py +3 -0
  104. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  105. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
  106. examples/task_apps/pokemon_red/task_app.py +606 -0
  107. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
  108. examples/task_apps/sokoban/README.md +307 -0
  109. examples/task_apps/sokoban/__init__.py +3 -0
  110. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  111. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  112. examples/task_apps/sokoban/task_app.py +1058 -0
  113. examples/task_apps/sokoban/tests/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/conftest.py +113 -0
  115. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  116. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  117. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  118. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  119. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  120. examples/task_apps/verilog/__init__.py +1 -0
  121. examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
  122. examples/task_apps/verilog/task_app/README.md +12 -0
  123. examples/task_apps/verilog/task_app/__init__.py +1 -0
  124. examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
  125. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  126. examples/task_apps/verilog/tests/__init__.py +2 -0
  127. examples/task_apps/verilog/tests/conftest.py +115 -0
  128. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  129. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
  130. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  131. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  132. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  133. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  134. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  135. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
  136. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
  137. examples/warming_up_to_rl/run_eval.py +127 -18
  138. examples/workflows/__init__.py +0 -0
  139. examples/workflows/math_rl/__init__.py +0 -0
  140. examples/workflows/math_rl/download_dataset.py +80 -0
  141. synth_ai/__init__.py +41 -1
  142. synth_ai/api/train/builders.py +73 -29
  143. synth_ai/api/train/cli.py +12 -6
  144. synth_ai/api/train/configs/__init__.py +44 -0
  145. synth_ai/api/train/configs/rl.py +134 -0
  146. synth_ai/api/train/configs/sft.py +95 -0
  147. synth_ai/api/train/configs/shared.py +24 -0
  148. synth_ai/api/train/env_resolver.py +5 -2
  149. synth_ai/api/train/supported_algos.py +10 -5
  150. synth_ai/api/train/utils.py +7 -4
  151. synth_ai/cli/__init__.py +7 -51
  152. synth_ai/cli/_storage.py +4 -3
  153. synth_ai/cli/_validate_task_app.py +11 -0
  154. synth_ai/cli/balance.py +4 -3
  155. synth_ai/cli/calc.py +2 -2
  156. synth_ai/cli/demo.py +49 -43
  157. synth_ai/cli/legacy_root_backup.py +1 -1
  158. synth_ai/cli/rl_demo.py +86 -106
  159. synth_ai/cli/root.py +0 -97
  160. synth_ai/cli/task_apps.py +1710 -186
  161. synth_ai/demos/core/cli.py +121 -159
  162. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
  163. synth_ai/environments/examples/crafter_classic/environment.py +16 -0
  164. synth_ai/environments/examples/enron/engine.py +7 -2
  165. synth_ai/environments/examples/enron/environment.py +68 -0
  166. synth_ai/environments/examples/red/engine.py +27 -0
  167. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  168. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  169. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  170. synth_ai/environments/examples/red/environment.py +60 -0
  171. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  172. synth_ai/environments/examples/verilog/engine.py +30 -4
  173. synth_ai/evals/__init__.py +15 -0
  174. synth_ai/evals/client.py +82 -0
  175. synth_ai/evals/types.py +42 -0
  176. synth_ai/jobs/client.py +16 -4
  177. synth_ai/judge_schemas.py +127 -0
  178. synth_ai/py.typed +0 -0
  179. synth_ai/task/__init__.py +14 -5
  180. synth_ai/task/contracts.py +124 -38
  181. synth_ai/task/proxy.py +48 -56
  182. synth_ai/task/rubrics/__init__.py +53 -0
  183. synth_ai/task/rubrics/loaders.py +133 -0
  184. synth_ai/task/rubrics/models.py +57 -0
  185. synth_ai/task/rubrics/scoring.py +113 -0
  186. synth_ai/task/rubrics/strict.py +149 -0
  187. synth_ai/task/server.py +8 -7
  188. synth_ai/task/validators.py +269 -6
  189. synth_ai/tracing_v3/decorators.py +7 -3
  190. synth_ai/tracing_v3/replica_sync.py +4 -4
  191. synth_ai/tracing_v3/serialization.py +130 -0
  192. synth_ai/tracing_v3/trace_utils.py +317 -0
  193. synth_ai/tracing_v3/turso/native_manager.py +3 -3
  194. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
  195. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
  196. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
  197. synth_ai/task/rubrics.py +0 -219
  198. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  199. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  200. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  201. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  202. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  203. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  204. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  205. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  206. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
  207. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
  208. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  209. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  210. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  211. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  212. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  213. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  214. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  215. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  216. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  217. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
  218. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  219. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  220. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  221. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  222. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  223. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  224. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  225. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  226. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  227. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
  228. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
  229. {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,357 @@
1
+ # SSE metrics/event streaming design (RL + FT)
2
+
3
+ ## Goals
4
+ - Near real-time push of job status, metrics, and logs during RL training, evaluation, and fine-tuning (FT)
5
+ - Single streaming endpoint per job, resumable (Last-Event-ID), low overhead, widely compatible (HTTP/1.1)
6
+ - Minimal client friction (CLI + Python helper), production-ready (auth, backpressure, rate limit)
7
+
8
+ ## Non-goals
9
+ - Binary/frame multiplexing (use WebSocket if needed later)
10
+ - Arbitrary high-frequency payloads (we will coalesce/limit ~2–4 Hz for metrics)
11
+
12
+ ---
13
+
14
+ ## Endpoint
15
+ - Method: GET `/rl/jobs/{job_id}/stream`
16
+ - Headers:
17
+ - Auth: `X-API-Key: <env key>` or `Authorization: Bearer <token>`
18
+ - Cache: `Cache-Control: no-cache`
19
+ - Response:
20
+ - Content-Type: `text/event-stream; charset=utf-8`
21
+ - Transfer-Encoding: `chunked`
22
+ - Connection: `keep-alive`
23
+ - Query params (optional):
24
+ - `since_id`: int; resume from a specific event id (inclusive)
25
+ - `types`: comma list `metric,status,log,artifact` (default: all)
26
+ - `heartbeat`: seconds between heartbeats (default 20)
27
+ - `split`: `train|eval` (filters metrics only)
28
+
29
+ ## Event framing (SSE)
30
+ - Fields per message:
31
+ - `id: <int>` monotonically increasing per job
32
+ - `event: <status|metric|log|artifact|heartbeat>`
33
+ - `data: <JSON>` single-line JSON (compact)
34
+ - Heartbeats: comment lines `: keep-alive` at configured interval
35
+ - Flush: after each event write + heartbeat
36
+ - Backpressure: if producer > consumer, coalesce metrics, keep status/logs, never buffer unbounded
37
+
38
+ ## Payload schemas
39
+ - `status`
40
+ - `{ "state": "queued|running|succeeded|failed|canceled", "step": 123, "epoch": 3, "phase": "train|eval|ft", "message": "...", "ts": 173.12 }`
41
+ - `metric`
42
+ - `{ "name": "avg_reward|loss|accuracy|success_rate|return", "value": 0.123, "step": 123, "epoch": 3, "split": "train|eval", "window": 100, "mean": 0.42, "std": 0.08, "ts": 173.12 }`
43
+ - Optional extras: `{ "tags": {"env": "crafter", "policy": "react"} }`
44
+ - `log`
45
+ - `{ "level": "INFO|WARN|ERROR", "message": "...", "ts": 173.12 }`
46
+ - `artifact`
47
+ - `{ "kind": "checkpoint|trace|plot|jsonl", "url": "/rl/jobs/{id}/artifacts/ckpt_0003.pt", "step": 123, "ts": 173.12 }`
48
+ - `heartbeat`
49
+ - `{ "alive": true, "ts": 173.12 }`
50
+
51
+ ### Example stream (illustrative)
52
+ ```
53
+ id: 101
54
+ event: status
55
+ data: {"state":"running","phase":"train","step":820,"epoch":4,"ts":173.12}
56
+
57
+ id: 102
58
+ event: metric
59
+ data: {"name":"avg_reward","value":0.62,"step":820,"epoch":4,"split":"train","ts":173.13}
60
+
61
+ id: 103
62
+ event: metric
63
+ data: {"name":"loss","value":1.84,"step":820,"epoch":4,"split":"train","window":100,"mean":1.90,"std":0.15,"ts":173.13}
64
+
65
+ id: 104
66
+ event: log
67
+ data: {"level":"INFO","message":"checkpoint saved","ts":173.16}
68
+
69
+ id: 105
70
+ event: artifact
71
+ data: {"kind":"checkpoint","url":"/rl/jobs/j_abc/artifacts/ckpt_0004.pt","step":820,"ts":173.16}
72
+
73
+ : keep-alive
74
+ ```
75
+
76
+ ---
77
+
78
+ ## Server architecture
79
+
80
+ ### Components
81
+ - Event bus per `job_id` (async queue) where producers (RL, FT, evaluator) `emit(Event)`
82
+ - Ring buffer per job for replay (configurable: last N events OR last T minutes)
83
+ - SSE handler:
84
+ 1) Authenticate, pick job, determine resume cursor (`since_id` or `Last-Event-ID`)
85
+ 2) Replay from ring buffer >= cursor
86
+ 3) Attach to live queue; stream new events
87
+ 4) Emit heartbeats; close after terminal `status` + grace
88
+
89
+ ### Concurrency & ordering
90
+ - Single writer increments `event_id`
91
+ - Replay preserves original order; live continues from last id
92
+ - If consumer slow: drop/coalesce metrics (preserve last per metric name), always deliver status/log/artifact
93
+
94
+ ### Rate limiting & coalescing
95
+ - Default target 2–4 Hz for metrics per split
96
+ - Coalesce by metric name within a small interval (e.g., 250–500 ms)
97
+ - Status events limited to phase changes or every 5–10s
98
+
99
+ ### Auth & security
100
+ - Accept `X-API-Key` or `Authorization: Bearer`
101
+ - Validate job ownership/visibility
102
+ - CORS: allow EventSource; set `Access-Control-Allow-Origin` appropriately
103
+ - Timeouts: server idle timeout > heartbeat * 2; client reconnect on drop
104
+
105
+ ### Config knobs (env)
106
+ - `SSE_HEARTBEAT_SECS` (default 20)
107
+ - `SSE_RING_BUFFER_EVENTS` (e.g., 2000) OR `SSE_RING_BUFFER_WINDOW_SECS` (e.g., 600)
108
+ - `SSE_MAX_METRIC_HZ` (e.g., 4)
109
+ - `SSE_MAX_CLIENTS_PER_JOB` (protect from fan-out)
110
+
111
+ ---
112
+
113
+ ## Emit points
114
+
115
+ ### RL training/eval
116
+ - On train step end: `metric` avg_reward/return/success_rate; `status` every N steps
117
+ - On eval step end: `metric` eval_return/success_rate; `artifact` eval JSONL optional
118
+ - On checkpoint: `artifact` + `log`
119
+ - On phase transitions: `status` (train→eval, etc.)
120
+
121
+ ### Fine-tuning (FT)
122
+ - On optimizer step: `metric` loss (and optional lr)
123
+ - On validation: `metric` val_loss/accuracy; optional `artifact` (curves)
124
+ - On checkpoint: `artifact` + `log`
125
+
126
+ ---
127
+
128
+ ## Client (synth-ai)
129
+
130
+ ### CLI
131
+ - `synth-ai jobs stream <job_id> [--jsonl out.jsonl] [--types metric,status]`
132
+ - Prints compact lines: `t=18:22:40 step=820 avg_reward=0.62 loss=1.84`
133
+ - Writes raw events to JSONL if specified
134
+
135
+ ### Python helper
136
+ ```python
137
+ from synth_ai.client import TaskAppClient
138
+
139
+ with TaskAppClient(base_url, api_key) as c:
140
+ for ev in c.stream_job(job_id, types=["metric","status"], since_id=None):
141
+ handle(ev)
142
+ ```
143
+ - Handles reconnect with `Last-Event-ID`
144
+ - Dedupes by event id; optional local aggregation windows
145
+
146
+ ---
147
+
148
+ ## Error handling & edge cases
149
+ - 404 (job not found): immediate error, no stream
150
+ - 401/403: immediate error, no stream
151
+ - Producer stalls: heartbeats continue; client shows "connected: no new data"
152
+ - Ring buffer miss (client too far behind): send a summary `status` snapshot, then live only
153
+ - Buffer overflow: set `dropped_events: true` field in a `status` update
154
+
155
+ ---
156
+
157
+ ## Testing plan
158
+ - Unit: SSE formatting, ring buffer indexing, resume semantics, coalescing
159
+ - Integration: run toy RL + FT loops; assert ordered sequence and types
160
+ - Load: simulate 100 Hz metric emissions; verify rate limiting and non-blocking behavior
161
+ - Auth: invalid key, missing key, cross-job access rejection
162
+
163
+ ---
164
+
165
+ ## Rollout plan
166
+ 1) Implement event bus + ring buffer per job
167
+ 2) Add SSE route with auth, resume, heartbeat
168
+ 3) Emiters in RL/FT loops (train/eval steps, loss/reward, checkpoints)
169
+ 4) synth-ai client: CLI + Python helper; optional JSONL sink
170
+ 5) Docs and example snippets (curl, Python, browser EventSource)
171
+
172
+ ---
173
+
174
+ ## Open questions / decisions
175
+ - Do we persist events beyond ring buffer? (optional artifact JSONL)
176
+ - Multi-tenant limits per org/user? (SSE_MAX_CLIENTS_PER_ORG?)
177
+ - Artifact URLs: signed vs. authenticated direct
178
+ - Backward compatibility with current polling endpoints
179
+ I'll scan the backend to find where RL jobs are created, status is served, and whether there's any existing streaming/event infrastructure to extend. Then I'll identify exact files/functions to change for adding an SSE endpoint and metric emitters.
180
+
181
+ [2 tools called]
182
+
183
+ Here’s the exact backend surface you’ll need to touch to add a single SSE stream for status + metrics, with where to hook metric emitters.
184
+
185
+ ### Where jobs and events live
186
+ - Job and event tables (legacy RL path):
187
+ ```1:20:monorepo/backend/app/db/models/jobs.py
188
+ """Job orchestration models (RL/SFT shared).
189
+ """
190
+ ```
191
+ ```31:58:monorepo/backend/app/db/models/jobs.py
192
+ class Job(Base):
193
+ __tablename__ = "jobs"
194
+ job_id = Column(String, primary_key=True)
195
+ ...
196
+ status = Column(String, nullable=False, default="QUEUED")
197
+ ...
198
+ last_seq = Column(BigInteger, nullable=True, default=0)
199
+ events = relationship("JobEvent", back_populates="job", cascade="all, delete-orphan", passive_deletes=True)
200
+ ```
201
+ ```66:88:monorepo/backend/app/db/models/jobs.py
202
+ class JobEvent(Base):
203
+ """Append-only job event log."""
204
+ __tablename__ = "job_events"
205
+ job_id = Column(String, ForeignKey("jobs.job_id", ondelete="CASCADE"), primary_key=True)
206
+ seq = Column(BigInteger, primary_key=True)
207
+ ts = Column(DateTime(timezone=True), server_default=func.now())
208
+ type = Column(String, nullable=False)
209
+ level = Column(String, nullable=False, default="info")
210
+ message = Column(Text, nullable=False)
211
+ data = Column(JSONB, nullable=True)
212
+ ```
213
+
214
+ - DB repo (atomic seq increment + append):
215
+ ```108:146:monorepo/backend/app/orchestration/jobs/repository_db.py
216
+ async def append_event(...):
217
+ res = await self.session.execute(
218
+ update(LearningJob)
219
+ .where(LearningJob.job_id == job_id)
220
+ .values(last_seq=func.coalesce(LearningJob.last_seq, 0) + 1, updated_at=func.now())
221
+ .returning(LearningJob.last_seq)
222
+ )
223
+ seq = res.scalar_one()
224
+ ev = JobEvent(job_id=job_id, seq=seq, type=type_, level=level, message=message, data=(data or {}))
225
+ ...
226
+ ```
227
+
228
+ - RL jobs REST (create + emit first events via PostgREST emitter):
229
+ ```214:233:monorepo/backend/app/routes/clustered_training/core/routes.py
230
+ @router.post("/jobs", ...)
231
+ async def create_job(...):
232
+ ...
233
+ ```
234
+ ```784:799:monorepo/backend/app/routes/clustered_training/core/routes.py
235
+ await get_postgrest_emitter().append_event(job_id=job_id, type_="rl.job.created", message="RL job created", data={"work": payload.work})
236
+ ```
237
+
238
+ - Shared storage path showing how “update” and “append event” are broadcast via the PostgREST emitter:
239
+ ```242:271:monorepo/backend/app/routes/simple_training/services/job_service.py
240
+ async def update_job(...): ...
241
+ async def append_job_event(...): ...
242
+ ```
243
+
244
+ What to add/change (SSE + emitters)
245
+
246
+ 1) Add SSE endpoint under RL jobs
247
+ - File: `monorepo/backend/app/routes/clustered_training/core/routes.py`
248
+ - New route: `GET /api/rl/jobs/{job_id}/stream`
249
+ - Auth: `ValidatedAPIKey`
250
+ - Headers: honor `Last-Event-ID`; Query: `since_id`, `types`, `heartbeat`
251
+ - Flow:
252
+ - Validate job exists (use the DB-backed repo in production mode)
253
+ - Determine start seq (from Last-Event-ID or since_id, else 0)
254
+ - Replay: call repo `list_events(job_id, after=seq)` and stream as SSE (`id`, `event`, `data`)
255
+ - Live tail: loop with short sleep (e.g., 0.5–1.0s) fetching new events by `after=last_seq`
256
+ - Heartbeats as comments `: keep-alive` every N seconds
257
+ - Event mapping:
258
+ - `JobEvent.type` prefixes map to SSE `event`:
259
+ - `rl.job.*`, `job.updated` → `status`
260
+ - `rl.step.metric`, `ft.step.metric`, `eval.metric` → `metric`
261
+ - `system.log.*` → `log`
262
+ - `artifact.*` → `artifact`
263
+ - `Job.status` can be snapshotted once at connect (send a `status`)
264
+
265
+ 2) Ensure a consistent event source for reads
266
+ - Prefer the DB repo (`JobsRepositoryDB`) in prod mode. If the current code path uses the PostgREST emitter for appends, verify that the repo’s `list_events` reads from the same canonical table (it does for `JobEvent`). If your RL path uses the “learning_shared” models instead, use the associated repository there (same pattern: list by job_id + seq).
267
+ - If you must keep PostgREST for append-only, that’s fine; SSE can still read the DB rows inserted alongside (your outbox/emitter already supports both).
268
+
269
+ 3) Emitters in training/FT loops
270
+ - File(s): `monorepo/backend/app/orchestration/hatchet/workflows.py` (RL workflow nodes), any FT job loops
271
+ - After each meaningful step:
272
+ - Train: append `type="rl.step.metric"`, `data={"avg_reward":..., "return":..., "success_rate":..., "step":..., "epoch":..., "split":"train"}`.
273
+ - Eval: `type="eval.metric"` with eval metrics and split.
274
+ - FT: `type="ft.step.metric"`, `data={"loss":..., "lr":..., "step":..., "epoch":..., "split":"train"}`; validation as `split="eval"`.
275
+ - On phase changes/checkpoints: `type="job.updated"` or `artifact.checkpoint` with URLs.
276
+ - Use the same helper used elsewhere:
277
+ ```236:276:monorepo/backend/app/routes/simple_training/services/job_service.py
278
+ async def append_job_event(...): return await get_postgrest_emitter().append_event(...)
279
+ ```
280
+
281
+ 4) Optional shared service abstraction
282
+ - File: `monorepo/backend/app/routes/simple_training/services/storage_shared.py`
283
+ - Add a small `stream_job_events(job_id, after)` helper that wraps `repo.list_events(...)` and normalizes schemas (legacy vs learning_shared). The SSE route can call this.
284
+
285
+ 5) Wire the router
286
+ - File: `monorepo/backend/app/routes/main.py`
287
+ - Include the new GET route (under the RL router you already mount).
288
+ - CORS: ensure EventSource allowed if frontend will use browser SSE.
289
+
290
+ Indexes/Perf you already have
291
+ - Jobs/events tables include per-job indexes and a `last_seq`; reads by `(job_id, seq)` are efficient:
292
+ ```84:88:monorepo/backend/app/db/models/jobs.py
293
+ Index("idx_job_events_job_ts", "job_id", "ts"),
294
+ Index("idx_job_events_data_gin", "data", postgresql_using="gin"),
295
+ ```
296
+ - For learning_shared events (if used), there are sequence indexes too:
297
+ ```159:165:monorepo/backend/app/db/models/learning_shared.py
298
+ sa.Index("idx_learning_job_events_job_seq_idx", "job_id", "seq"),
299
+ ```
300
+
301
+ Summary of minimal backend edits
302
+ - Add SSE route:
303
+ - `monorepo/backend/app/routes/clustered_training/core/routes.py` (GET `/api/rl/jobs/{job_id}/stream`)
304
+ - Consume events via repo:
305
+ - `monorepo/backend/app/orchestration/jobs/repository_db.py` (use `list_events`)
306
+ - Emit metrics from loops:
307
+ - `monorepo/backend/app/orchestration/hatchet/workflows.py` (append_event at train/eval steps)
308
+ - Optional shared helper:
309
+ - `monorepo/backend/app/routes/simple_training/services/storage_shared.py` (normalize event reads)
310
+ - Wireup:
311
+ - `monorepo/backend/app/routes/main.py` (include SSE route)
312
+ - No schema changes required; you already have `JobEvent`/indexes and event append plumbing.
313
+
314
+ ### SDK additions for great terminal polling (with SSE fallback)
315
+
316
+ - RlJobsApi extensions (synth_ai/jobs/client.py)
317
+ - stream(job_id, since_id=None, types=None, heartbeat=None) -> async iterator of events (uses SSE; falls back to polling)
318
+ - events(job_id, after=None, limit=500) -> list[JobEvent] (poll)
319
+ - status(job_id) -> JobSummary (single snapshot)
320
+
321
+ - Event models (synth_ai/jobs/types.py)
322
+ - JobEvent base: {id, type, level, message, data, ts}
323
+ - StatusEvent, MetricEvent, LogEvent, ArtifactEvent (typed helpers)
324
+
325
+ - JobsWatcher helper (synth_ai/jobs/watcher.py)
326
+ - constructor(client, job_id, interval=2.0, prefer_sse=True, jsonl_path=None, types=None)
327
+ - run(on_event, stop_when=None) → handles SSE connect/reconnect, polling fallback, Last-Event-ID cursor, dedupe
328
+ - metrics_tracker: rolling windows per metric name (mean/std/min/max, last_value, last_step)
329
+ - backoff policy: jittered reconnect; rate limiter for render
330
+
331
+ - Terminal renderer (synth_ai/jobs/render.py)
332
+ - RichRenderer (or minimal TTY): compact line updates: t=HH:MM:SS | step/E | key metrics (avg_reward, loss, val_loss, success_rate)
333
+ - modes: one-line ticker vs. per-event lines; quiet mode; color by level/state
334
+ - JSONL sink: raw event writes without printing prompts/payloads
335
+
336
+ - CLI command (synth_ai/api/train/cli.py)
337
+ - synth-ai jobs watch <job_id> [--types metric,status] [--interval 2] [--jsonl out.jsonl] [--since-id N] [--no-sse]
338
+ - exit codes: 0 on succeeded, 1 on failed/canceled, 2 on timeout
339
+
340
+ - Utilities (synth_ai/jobs/utils.py)
341
+ - BackoffPolicy(retry, max) with jitter
342
+ - EventCursor(last_id, update)
343
+ - MetricsFormatter(map by job_type: RL vs FT metric labels)
344
+ - Coalescer: compress frequent metrics to ≤4 Hz
345
+
346
+ - Defaults/behavior
347
+ - Prefer SSE; if 404/405/close → fallback to polling events() every interval
348
+ - Heartbeat support; show “connected/no data” when only heartbeats
349
+ - Resume: honor --since-id or Last-Event-ID; persist cursor optionally
350
+
351
+ - Minimal backend assumptions
352
+ - GET /api/rl/jobs/{job_id}/stream (SSE) or /api/rl/jobs/{job_id}/events?after=… (poll)
353
+ - Events include metric/status/log/artifact with seq ids and ts
354
+
355
+ - Extensibility
356
+ - Plugin renderers per job_type (rl, sft/ft)
357
+ - Hooks: on_status_change, on_metric(name, value), on_artifact(url)
@@ -485,4 +485,10 @@ payload = {
485
485
  }
486
486
  ```
487
487
 
488
- Status: Notes committed on branch `friday-cleanup` and pushed.
488
+ Status: Notes committed on branch `friday-cleanup` and pushed.
489
+
490
+ ### Operational guardrails
491
+
492
+ - Treat avg_turns == 0 (or a high fraction of episodes with turns == 0) as a failure condition; exit non‑zero.
493
+ - Fail fast when the first policy step returns a 4xx/5xx from the inference target; include the HTTP status and URL in the error message.
494
+ - CI hint: a tiny smoke run (2 seeds × 1 rollout) should see turns > 0 in healthy setups.
@@ -60,34 +60,55 @@ try:
60
60
  HAS_HOSTED = True
61
61
  except Exception:
62
62
  try: # pragma: no cover - optional dependency path
63
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.branching import ( # type: ignore
64
- router as branching_router,
63
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.branching import ( # type: ignore
64
+ BranchingEnvironmentConfig,
65
65
  )
66
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.environment_routes import ( # type: ignore # noqa: E501
67
- router as environment_router,
66
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.environment_routes import ( # type: ignore # noqa: E501
67
+ CrafterEnvironmentRoutes,
68
68
  )
69
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.policy_routes import ( # type: ignore
70
- router as policy_router,
69
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.policy_routes import ( # type: ignore
70
+ PolicyRoutes,
71
71
  )
72
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import ( # type: ignore
72
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import ( # type: ignore
73
+ RolloutPayload,
74
+ )
75
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
76
+ EnvironmentConfig,
77
+ )
78
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
79
+ PolicyConfig,
80
+ )
81
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
82
+ RolloutRequest,
83
+ )
84
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
85
+ RolloutResponse,
86
+ )
87
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
88
+ RunSpec,
89
+ )
90
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
91
+ ToolUse,
92
+ )
93
+ from examples.task_apps.crafter.task_app.hosted.rollout import ( # type: ignore
73
94
  RolloutEnvSpec as LegacyRolloutEnvSpec,
74
95
  )
75
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
96
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
76
97
  RolloutPolicySpec as LegacyRolloutPolicySpec,
77
98
  )
78
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
99
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
79
100
  RolloutRecordConfig as LegacyRolloutRecordConfig,
80
101
  )
81
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
102
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
82
103
  RolloutRequest as LegacyRolloutRequest,
83
104
  )
84
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
105
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
85
106
  RolloutResponse as LegacyRolloutResponse,
86
107
  )
87
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
108
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
88
109
  RolloutSafetyConfig as LegacyRolloutSafetyConfig,
89
110
  )
90
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.rollout import (
111
+ from examples.task_apps.crafter.task_app.hosted.rollout import (
91
112
  execute_rollout as legacy_execute_rollout,
92
113
  )
93
114
  HAS_HOSTED = True
@@ -264,7 +285,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, MiniSweDataset]:
264
285
  def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
265
286
  return TaskInfo(
266
287
  task={"id": "swe_mini", "name": "mini-SWE Tasks", "version": "0.1.0"},
267
- environments=["swe-mini"],
288
+ environment="swe-mini",
268
289
  action_space={
269
290
  "type": "tool",
270
291
  "tools": ["run_command", "submit_patch"],
@@ -292,11 +313,6 @@ def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
292
313
  },
293
314
  "tool": {"name": "run_command", "parallel_tool_calls": False},
294
315
  },
295
- capabilities={
296
- "supports_rollout": True,
297
- "supports_env_lifecycle": True,
298
- "requires_api_key_header": True,
299
- },
300
316
  limits={"max_ops": 2000, "max_time_s": 7200},
301
317
  )
302
318
 
@@ -348,18 +364,31 @@ def provide_task_instances(
348
364
  dataset: MiniSweDataset, base_info: TaskInfo, seeds: Sequence[int]
349
365
  ) -> Iterable[TaskInfo]:
350
366
  infos: list[TaskInfo] = []
367
+ base_observation = getattr(base_info, "observation", None)
368
+ if hasattr(base_observation, "model_dump"):
369
+ base_observation_data = base_observation.model_dump()
370
+ elif isinstance(base_observation, dict):
371
+ base_observation_data = dict(base_observation)
372
+ else:
373
+ base_observation_data = {}
374
+
351
375
  for seed in seeds:
352
376
  instance = dataset.sample_by_index(int(seed))
353
377
  infos.append(
354
378
  TaskInfo(
355
379
  task=base_info.task,
356
- environments=base_info.environments,
380
+ environment=base_info.environment,
357
381
  action_space=base_info.action_space,
358
- observation={**base_info.observation, "instance_id": instance["instance_id"]},
359
- dataset={**base_info.dataset, "instance_id": instance["instance_id"]},
382
+ observation={
383
+ **base_observation_data,
384
+ "instance_id": instance["instance_id"],
385
+ },
386
+ dataset={
387
+ **base_info.dataset.model_dump(),
388
+ "instance_id": instance["instance_id"],
389
+ },
360
390
  rubric=base_info.rubric,
361
391
  inference=base_info.inference,
362
- capabilities=base_info.capabilities,
363
392
  limits=base_info.limits,
364
393
  )
365
394
  )
@@ -397,10 +426,10 @@ def build_config() -> TaskAppConfig:
397
426
  HostedTaskAppCls = HostedTaskApp
398
427
  except Exception:
399
428
  try:
400
- from examples.warming_up_to_rl.task_app.synth_envs_hosted.hosted_app import ( # type: ignore
401
- TaskApp as HostedTaskApp,
429
+ from examples.task_apps.crafter.task_app.synth_envs_hosted.hosted_app import ( # type: ignore
430
+ create_app,
402
431
  )
403
- HostedTaskAppCls = HostedTaskApp
432
+ HostedTaskAppCls = create_app
404
433
  except Exception as exc: # pragma: no cover - optional dependency path
405
434
  logger.warning("Unable to import HostedTaskApp for swe-mini: %s", exc)
406
435
  if HostedTaskAppCls is not None:
@@ -1238,6 +1238,15 @@ async def execute_rollout(
1238
1238
  )
1239
1239
 
1240
1240
  # Build partial trajectory and return HTTP 200
1241
+ # Extract inference_url from policy meta (best effort)
1242
+ inference_url = None
1243
+ if policy_handle is not None:
1244
+ try:
1245
+ policy_snapshot = policy_handle.snapshot()
1246
+ inference_url = policy_snapshot.get("config", {}).get("inference_url")
1247
+ except Exception:
1248
+ pass
1249
+
1241
1250
  trajectory = RolloutTrajectory(
1242
1251
  env_id=env_id,
1243
1252
  policy_id=policy_id,
@@ -1249,6 +1258,7 @@ async def execute_rollout(
1249
1258
  "at_op": op,
1250
1259
  },
1251
1260
  length=len(trajectory_steps),
1261
+ inference_url=inference_url, # NEW: Required for trace correlation
1252
1262
  decision_samples=decision_samples if step_rewards_active else None,
1253
1263
  )
1254
1264
  metrics = RolloutMetrics(
@@ -1369,6 +1379,15 @@ async def execute_rollout(
1369
1379
  },
1370
1380
  )
1371
1381
  trajectory_steps.append(term_step)
1382
+ # Extract inference_url from policy meta (best effort)
1383
+ inference_url = None
1384
+ if policy_handle is not None:
1385
+ try:
1386
+ policy_snapshot = policy_handle.snapshot()
1387
+ inference_url = policy_snapshot.get("config", {}).get("inference_url")
1388
+ except Exception:
1389
+ pass
1390
+
1372
1391
  trajectory = RolloutTrajectory(
1373
1392
  env_id=env_id,
1374
1393
  policy_id=policy_id,
@@ -1379,6 +1398,7 @@ async def execute_rollout(
1379
1398
  "at_op": op,
1380
1399
  },
1381
1400
  length=len(trajectory_steps),
1401
+ inference_url=inference_url, # NEW: Required for trace correlation
1382
1402
  decision_samples=decision_samples if step_rewards_active else None,
1383
1403
  )
1384
1404
  metrics = RolloutMetrics(
@@ -1460,6 +1480,15 @@ async def execute_rollout(
1460
1480
  )
1461
1481
  trajectory_steps.append(term_step)
1462
1482
  # Build partial response
1483
+ # Extract inference_url from policy meta (best effort)
1484
+ inference_url = None
1485
+ if policy_handle is not None:
1486
+ try:
1487
+ policy_snapshot = policy_handle.snapshot()
1488
+ inference_url = policy_snapshot.get("config", {}).get("inference_url")
1489
+ except Exception:
1490
+ pass
1491
+
1463
1492
  trajectory = RolloutTrajectory(
1464
1493
  env_id=env_id,
1465
1494
  policy_id=policy_id,
@@ -1471,6 +1500,7 @@ async def execute_rollout(
1471
1500
  "at_op": op,
1472
1501
  },
1473
1502
  length=len(trajectory_steps),
1503
+ inference_url=inference_url, # NEW: Required for trace correlation
1474
1504
  decision_samples=decision_samples if step_rewards_active else None,
1475
1505
  )
1476
1506
  metrics = RolloutMetrics(
@@ -1688,12 +1718,22 @@ async def execute_rollout(
1688
1718
  timing_final.setdefault("overhead_ms", 0.0)
1689
1719
 
1690
1720
  # Build trajectory
1721
+ # Extract inference_url from policy meta
1722
+ inference_url = None
1723
+ if policy_handle is not None:
1724
+ try:
1725
+ policy_snapshot = policy_handle.snapshot()
1726
+ inference_url = policy_snapshot.get("config", {}).get("inference_url")
1727
+ except Exception:
1728
+ pass
1729
+
1691
1730
  trajectory = RolloutTrajectory(
1692
1731
  env_id=env_id,
1693
1732
  policy_id=policy_id,
1694
1733
  steps=trajectory_steps,
1695
1734
  final={"observation": _summarize_observation_for_storage(env_handle, current_obs)},
1696
1735
  length=len(trajectory_steps),
1736
+ inference_url=inference_url, # NEW: Required for trace correlation
1697
1737
  decision_samples=decision_samples if step_rewards_active else None,
1698
1738
  )
1699
1739
 
@@ -1,15 +1,14 @@
1
1
  #!/usr/bin/env python3
2
- """
3
- Simple test script for the GRPO Synth Envs Hosted Service.
4
-
5
- Run this after starting the service with:
6
- python main.py
7
- """
2
+ """Manual smoke script for the GRPO Synth Envs Hosted Service."""
8
3
 
9
4
  import asyncio
10
5
  import json
11
6
 
12
7
  import httpx
8
+ import pytest
9
+
10
+
11
+ pytestmark = pytest.mark.skip(reason="Requires running hosted service on localhost:8000")
13
12
 
14
13
 
15
14
  async def test_service():