synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +186 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
- examples/multi_step/crafter_rl_lora.md +51 -10
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +7 -1
- examples/swe/task_app/grpo_swe_mini.py +55 -26
- examples/swe/task_app/hosted/rollout.py +40 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/task_app/__init__.py +2 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +21 -46
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +60 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +67 -49
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +242 -193
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +2 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +2 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +177 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +2 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +73 -0
- examples/task_apps/pokemon_red/task_app.py +606 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +191 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +2 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +20 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +931 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +2 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +2 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +179 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +2 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
- examples/warming_up_to_rl/run_eval.py +127 -18
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +41 -1
- synth_ai/api/train/builders.py +73 -29
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +44 -0
- synth_ai/api/train/configs/rl.py +134 -0
- synth_ai/api/train/configs/sft.py +95 -0
- synth_ai/api/train/configs/shared.py +24 -0
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +7 -51
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +49 -43
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/rl_demo.py +86 -106
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/task_apps.py +1710 -186
- synth_ai/demos/core/cli.py +121 -159
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +28 -16
- synth_ai/environments/examples/crafter_classic/environment.py +16 -0
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +30 -4
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +127 -0
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +14 -5
- synth_ai/task/contracts.py +124 -38
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +53 -0
- synth_ai/task/rubrics/loaders.py +133 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +113 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/server.py +8 -7
- synth_ai/task/validators.py +269 -6
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/native_manager.py +3 -3
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/METADATA +4 -1
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/RECORD +228 -89
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/entry_points.txt +0 -1
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/utils.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,357 @@
|
|
|
1
|
+
# SSE metrics/event streaming design (RL + FT)
|
|
2
|
+
|
|
3
|
+
## Goals
|
|
4
|
+
- Near real-time push of job status, metrics, and logs during RL training, evaluation, and fine-tuning (FT)
|
|
5
|
+
- Single streaming endpoint per job, resumable (Last-Event-ID), low overhead, widely compatible (HTTP/1.1)
|
|
6
|
+
- Minimal client friction (CLI + Python helper), production-ready (auth, backpressure, rate limit)
|
|
7
|
+
|
|
8
|
+
## Non-goals
|
|
9
|
+
- Binary/frame multiplexing (use WebSocket if needed later)
|
|
10
|
+
- Arbitrary high-frequency payloads (we will coalesce/limit ~2–4 Hz for metrics)
|
|
11
|
+
|
|
12
|
+
---
|
|
13
|
+
|
|
14
|
+
## Endpoint
|
|
15
|
+
- Method: GET `/rl/jobs/{job_id}/stream`
|
|
16
|
+
- Headers:
|
|
17
|
+
- Auth: `X-API-Key: <env key>` or `Authorization: Bearer <token>`
|
|
18
|
+
- Cache: `Cache-Control: no-cache`
|
|
19
|
+
- Response:
|
|
20
|
+
- Content-Type: `text/event-stream; charset=utf-8`
|
|
21
|
+
- Transfer-Encoding: `chunked`
|
|
22
|
+
- Connection: `keep-alive`
|
|
23
|
+
- Query params (optional):
|
|
24
|
+
- `since_id`: int; resume from a specific event id (inclusive)
|
|
25
|
+
- `types`: comma list `metric,status,log,artifact` (default: all)
|
|
26
|
+
- `heartbeat`: seconds between heartbeats (default 20)
|
|
27
|
+
- `split`: `train|eval` (filters metrics only)
|
|
28
|
+
|
|
29
|
+
## Event framing (SSE)
|
|
30
|
+
- Fields per message:
|
|
31
|
+
- `id: <int>` monotonically increasing per job
|
|
32
|
+
- `event: <status|metric|log|artifact|heartbeat>`
|
|
33
|
+
- `data: <JSON>` single-line JSON (compact)
|
|
34
|
+
- Heartbeats: comment lines `: keep-alive` at configured interval
|
|
35
|
+
- Flush: after each event write + heartbeat
|
|
36
|
+
- Backpressure: if producer > consumer, coalesce metrics, keep status/logs, never buffer unbounded
|
|
37
|
+
|
|
38
|
+
## Payload schemas
|
|
39
|
+
- `status`
|
|
40
|
+
- `{ "state": "queued|running|succeeded|failed|canceled", "step": 123, "epoch": 3, "phase": "train|eval|ft", "message": "...", "ts": 173.12 }`
|
|
41
|
+
- `metric`
|
|
42
|
+
- `{ "name": "avg_reward|loss|accuracy|success_rate|return", "value": 0.123, "step": 123, "epoch": 3, "split": "train|eval", "window": 100, "mean": 0.42, "std": 0.08, "ts": 173.12 }`
|
|
43
|
+
- Optional extras: `{ "tags": {"env": "crafter", "policy": "react"} }`
|
|
44
|
+
- `log`
|
|
45
|
+
- `{ "level": "INFO|WARN|ERROR", "message": "...", "ts": 173.12 }`
|
|
46
|
+
- `artifact`
|
|
47
|
+
- `{ "kind": "checkpoint|trace|plot|jsonl", "url": "/rl/jobs/{id}/artifacts/ckpt_0003.pt", "step": 123, "ts": 173.12 }`
|
|
48
|
+
- `heartbeat`
|
|
49
|
+
- `{ "alive": true, "ts": 173.12 }`
|
|
50
|
+
|
|
51
|
+
### Example stream (illustrative)
|
|
52
|
+
```
|
|
53
|
+
id: 101
|
|
54
|
+
event: status
|
|
55
|
+
data: {"state":"running","phase":"train","step":820,"epoch":4,"ts":173.12}
|
|
56
|
+
|
|
57
|
+
id: 102
|
|
58
|
+
event: metric
|
|
59
|
+
data: {"name":"avg_reward","value":0.62,"step":820,"epoch":4,"split":"train","ts":173.13}
|
|
60
|
+
|
|
61
|
+
id: 103
|
|
62
|
+
event: metric
|
|
63
|
+
data: {"name":"loss","value":1.84,"step":820,"epoch":4,"split":"train","window":100,"mean":1.90,"std":0.15,"ts":173.13}
|
|
64
|
+
|
|
65
|
+
id: 104
|
|
66
|
+
event: log
|
|
67
|
+
data: {"level":"INFO","message":"checkpoint saved","ts":173.16}
|
|
68
|
+
|
|
69
|
+
id: 105
|
|
70
|
+
event: artifact
|
|
71
|
+
data: {"kind":"checkpoint","url":"/rl/jobs/j_abc/artifacts/ckpt_0004.pt","step":820,"ts":173.16}
|
|
72
|
+
|
|
73
|
+
: keep-alive
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Server architecture
|
|
79
|
+
|
|
80
|
+
### Components
|
|
81
|
+
- Event bus per `job_id` (async queue) where producers (RL, FT, evaluator) `emit(Event)`
|
|
82
|
+
- Ring buffer per job for replay (configurable: last N events OR last T minutes)
|
|
83
|
+
- SSE handler:
|
|
84
|
+
1) Authenticate, pick job, determine resume cursor (`since_id` or `Last-Event-ID`)
|
|
85
|
+
2) Replay from ring buffer >= cursor
|
|
86
|
+
3) Attach to live queue; stream new events
|
|
87
|
+
4) Emit heartbeats; close after terminal `status` + grace
|
|
88
|
+
|
|
89
|
+
### Concurrency & ordering
|
|
90
|
+
- Single writer increments `event_id`
|
|
91
|
+
- Replay preserves original order; live continues from last id
|
|
92
|
+
- If consumer slow: drop/coalesce metrics (preserve last per metric name), always deliver status/log/artifact
|
|
93
|
+
|
|
94
|
+
### Rate limiting & coalescing
|
|
95
|
+
- Default target 2–4 Hz for metrics per split
|
|
96
|
+
- Coalesce by metric name within a small interval (e.g., 250–500 ms)
|
|
97
|
+
- Status events limited to phase changes or every 5–10s
|
|
98
|
+
|
|
99
|
+
### Auth & security
|
|
100
|
+
- Accept `X-API-Key` or `Authorization: Bearer`
|
|
101
|
+
- Validate job ownership/visibility
|
|
102
|
+
- CORS: allow EventSource; set `Access-Control-Allow-Origin` appropriately
|
|
103
|
+
- Timeouts: server idle timeout > heartbeat * 2; client reconnect on drop
|
|
104
|
+
|
|
105
|
+
### Config knobs (env)
|
|
106
|
+
- `SSE_HEARTBEAT_SECS` (default 20)
|
|
107
|
+
- `SSE_RING_BUFFER_EVENTS` (e.g., 2000) OR `SSE_RING_BUFFER_WINDOW_SECS` (e.g., 600)
|
|
108
|
+
- `SSE_MAX_METRIC_HZ` (e.g., 4)
|
|
109
|
+
- `SSE_MAX_CLIENTS_PER_JOB` (protect from fan-out)
|
|
110
|
+
|
|
111
|
+
---
|
|
112
|
+
|
|
113
|
+
## Emit points
|
|
114
|
+
|
|
115
|
+
### RL training/eval
|
|
116
|
+
- On train step end: `metric` avg_reward/return/success_rate; `status` every N steps
|
|
117
|
+
- On eval step end: `metric` eval_return/success_rate; `artifact` eval JSONL optional
|
|
118
|
+
- On checkpoint: `artifact` + `log`
|
|
119
|
+
- On phase transitions: `status` (train→eval, etc.)
|
|
120
|
+
|
|
121
|
+
### Fine-tuning (FT)
|
|
122
|
+
- On optimizer step: `metric` loss (and optional lr)
|
|
123
|
+
- On validation: `metric` val_loss/accuracy; optional `artifact` (curves)
|
|
124
|
+
- On checkpoint: `artifact` + `log`
|
|
125
|
+
|
|
126
|
+
---
|
|
127
|
+
|
|
128
|
+
## Client (synth-ai)
|
|
129
|
+
|
|
130
|
+
### CLI
|
|
131
|
+
- `synth-ai jobs stream <job_id> [--jsonl out.jsonl] [--types metric,status]`
|
|
132
|
+
- Prints compact lines: `t=18:22:40 step=820 avg_reward=0.62 loss=1.84`
|
|
133
|
+
- Writes raw events to JSONL if specified
|
|
134
|
+
|
|
135
|
+
### Python helper
|
|
136
|
+
```python
|
|
137
|
+
from synth_ai.client import TaskAppClient
|
|
138
|
+
|
|
139
|
+
with TaskAppClient(base_url, api_key) as c:
|
|
140
|
+
for ev in c.stream_job(job_id, types=["metric","status"], since_id=None):
|
|
141
|
+
handle(ev)
|
|
142
|
+
```
|
|
143
|
+
- Handles reconnect with `Last-Event-ID`
|
|
144
|
+
- Dedupes by event id; optional local aggregation windows
|
|
145
|
+
|
|
146
|
+
---
|
|
147
|
+
|
|
148
|
+
## Error handling & edge cases
|
|
149
|
+
- 404 (job not found): immediate error, no stream
|
|
150
|
+
- 401/403: immediate error, no stream
|
|
151
|
+
- Producer stalls: heartbeats continue; client shows "connected: no new data"
|
|
152
|
+
- Ring buffer miss (client too far behind): send a summary `status` snapshot, then live only
|
|
153
|
+
- Buffer overflow: set `dropped_events: true` field in a `status` update
|
|
154
|
+
|
|
155
|
+
---
|
|
156
|
+
|
|
157
|
+
## Testing plan
|
|
158
|
+
- Unit: SSE formatting, ring buffer indexing, resume semantics, coalescing
|
|
159
|
+
- Integration: run toy RL + FT loops; assert ordered sequence and types
|
|
160
|
+
- Load: simulate 100 Hz metric emissions; verify rate limiting and non-blocking behavior
|
|
161
|
+
- Auth: invalid key, missing key, cross-job access rejection
|
|
162
|
+
|
|
163
|
+
---
|
|
164
|
+
|
|
165
|
+
## Rollout plan
|
|
166
|
+
1) Implement event bus + ring buffer per job
|
|
167
|
+
2) Add SSE route with auth, resume, heartbeat
|
|
168
|
+
3) Emiters in RL/FT loops (train/eval steps, loss/reward, checkpoints)
|
|
169
|
+
4) synth-ai client: CLI + Python helper; optional JSONL sink
|
|
170
|
+
5) Docs and example snippets (curl, Python, browser EventSource)
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Open questions / decisions
|
|
175
|
+
- Do we persist events beyond ring buffer? (optional artifact JSONL)
|
|
176
|
+
- Multi-tenant limits per org/user? (SSE_MAX_CLIENTS_PER_ORG?)
|
|
177
|
+
- Artifact URLs: signed vs. authenticated direct
|
|
178
|
+
- Backward compatibility with current polling endpoints
|
|
179
|
+
I'll scan the backend to find where RL jobs are created, status is served, and whether there's any existing streaming/event infrastructure to extend. Then I'll identify exact files/functions to change for adding an SSE endpoint and metric emitters.
|
|
180
|
+
|
|
181
|
+
[2 tools called]
|
|
182
|
+
|
|
183
|
+
Here’s the exact backend surface you’ll need to touch to add a single SSE stream for status + metrics, with where to hook metric emitters.
|
|
184
|
+
|
|
185
|
+
### Where jobs and events live
|
|
186
|
+
- Job and event tables (legacy RL path):
|
|
187
|
+
```1:20:monorepo/backend/app/db/models/jobs.py
|
|
188
|
+
"""Job orchestration models (RL/SFT shared).
|
|
189
|
+
"""
|
|
190
|
+
```
|
|
191
|
+
```31:58:monorepo/backend/app/db/models/jobs.py
|
|
192
|
+
class Job(Base):
|
|
193
|
+
__tablename__ = "jobs"
|
|
194
|
+
job_id = Column(String, primary_key=True)
|
|
195
|
+
...
|
|
196
|
+
status = Column(String, nullable=False, default="QUEUED")
|
|
197
|
+
...
|
|
198
|
+
last_seq = Column(BigInteger, nullable=True, default=0)
|
|
199
|
+
events = relationship("JobEvent", back_populates="job", cascade="all, delete-orphan", passive_deletes=True)
|
|
200
|
+
```
|
|
201
|
+
```66:88:monorepo/backend/app/db/models/jobs.py
|
|
202
|
+
class JobEvent(Base):
|
|
203
|
+
"""Append-only job event log."""
|
|
204
|
+
__tablename__ = "job_events"
|
|
205
|
+
job_id = Column(String, ForeignKey("jobs.job_id", ondelete="CASCADE"), primary_key=True)
|
|
206
|
+
seq = Column(BigInteger, primary_key=True)
|
|
207
|
+
ts = Column(DateTime(timezone=True), server_default=func.now())
|
|
208
|
+
type = Column(String, nullable=False)
|
|
209
|
+
level = Column(String, nullable=False, default="info")
|
|
210
|
+
message = Column(Text, nullable=False)
|
|
211
|
+
data = Column(JSONB, nullable=True)
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
- DB repo (atomic seq increment + append):
|
|
215
|
+
```108:146:monorepo/backend/app/orchestration/jobs/repository_db.py
|
|
216
|
+
async def append_event(...):
|
|
217
|
+
res = await self.session.execute(
|
|
218
|
+
update(LearningJob)
|
|
219
|
+
.where(LearningJob.job_id == job_id)
|
|
220
|
+
.values(last_seq=func.coalesce(LearningJob.last_seq, 0) + 1, updated_at=func.now())
|
|
221
|
+
.returning(LearningJob.last_seq)
|
|
222
|
+
)
|
|
223
|
+
seq = res.scalar_one()
|
|
224
|
+
ev = JobEvent(job_id=job_id, seq=seq, type=type_, level=level, message=message, data=(data or {}))
|
|
225
|
+
...
|
|
226
|
+
```
|
|
227
|
+
|
|
228
|
+
- RL jobs REST (create + emit first events via PostgREST emitter):
|
|
229
|
+
```214:233:monorepo/backend/app/routes/clustered_training/core/routes.py
|
|
230
|
+
@router.post("/jobs", ...)
|
|
231
|
+
async def create_job(...):
|
|
232
|
+
...
|
|
233
|
+
```
|
|
234
|
+
```784:799:monorepo/backend/app/routes/clustered_training/core/routes.py
|
|
235
|
+
await get_postgrest_emitter().append_event(job_id=job_id, type_="rl.job.created", message="RL job created", data={"work": payload.work})
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
- Shared storage path showing how “update” and “append event” are broadcast via the PostgREST emitter:
|
|
239
|
+
```242:271:monorepo/backend/app/routes/simple_training/services/job_service.py
|
|
240
|
+
async def update_job(...): ...
|
|
241
|
+
async def append_job_event(...): ...
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
What to add/change (SSE + emitters)
|
|
245
|
+
|
|
246
|
+
1) Add SSE endpoint under RL jobs
|
|
247
|
+
- File: `monorepo/backend/app/routes/clustered_training/core/routes.py`
|
|
248
|
+
- New route: `GET /api/rl/jobs/{job_id}/stream`
|
|
249
|
+
- Auth: `ValidatedAPIKey`
|
|
250
|
+
- Headers: honor `Last-Event-ID`; Query: `since_id`, `types`, `heartbeat`
|
|
251
|
+
- Flow:
|
|
252
|
+
- Validate job exists (use the DB-backed repo in production mode)
|
|
253
|
+
- Determine start seq (from Last-Event-ID or since_id, else 0)
|
|
254
|
+
- Replay: call repo `list_events(job_id, after=seq)` and stream as SSE (`id`, `event`, `data`)
|
|
255
|
+
- Live tail: loop with short sleep (e.g., 0.5–1.0s) fetching new events by `after=last_seq`
|
|
256
|
+
- Heartbeats as comments `: keep-alive` every N seconds
|
|
257
|
+
- Event mapping:
|
|
258
|
+
- `JobEvent.type` prefixes map to SSE `event`:
|
|
259
|
+
- `rl.job.*`, `job.updated` → `status`
|
|
260
|
+
- `rl.step.metric`, `ft.step.metric`, `eval.metric` → `metric`
|
|
261
|
+
- `system.log.*` → `log`
|
|
262
|
+
- `artifact.*` → `artifact`
|
|
263
|
+
- `Job.status` can be snapshotted once at connect (send a `status`)
|
|
264
|
+
|
|
265
|
+
2) Ensure a consistent event source for reads
|
|
266
|
+
- Prefer the DB repo (`JobsRepositoryDB`) in prod mode. If the current code path uses the PostgREST emitter for appends, verify that the repo’s `list_events` reads from the same canonical table (it does for `JobEvent`). If your RL path uses the “learning_shared” models instead, use the associated repository there (same pattern: list by job_id + seq).
|
|
267
|
+
- If you must keep PostgREST for append-only, that’s fine; SSE can still read the DB rows inserted alongside (your outbox/emitter already supports both).
|
|
268
|
+
|
|
269
|
+
3) Emitters in training/FT loops
|
|
270
|
+
- File(s): `monorepo/backend/app/orchestration/hatchet/workflows.py` (RL workflow nodes), any FT job loops
|
|
271
|
+
- After each meaningful step:
|
|
272
|
+
- Train: append `type="rl.step.metric"`, `data={"avg_reward":..., "return":..., "success_rate":..., "step":..., "epoch":..., "split":"train"}`.
|
|
273
|
+
- Eval: `type="eval.metric"` with eval metrics and split.
|
|
274
|
+
- FT: `type="ft.step.metric"`, `data={"loss":..., "lr":..., "step":..., "epoch":..., "split":"train"}`; validation as `split="eval"`.
|
|
275
|
+
- On phase changes/checkpoints: `type="job.updated"` or `artifact.checkpoint` with URLs.
|
|
276
|
+
- Use the same helper used elsewhere:
|
|
277
|
+
```236:276:monorepo/backend/app/routes/simple_training/services/job_service.py
|
|
278
|
+
async def append_job_event(...): return await get_postgrest_emitter().append_event(...)
|
|
279
|
+
```
|
|
280
|
+
|
|
281
|
+
4) Optional shared service abstraction
|
|
282
|
+
- File: `monorepo/backend/app/routes/simple_training/services/storage_shared.py`
|
|
283
|
+
- Add a small `stream_job_events(job_id, after)` helper that wraps `repo.list_events(...)` and normalizes schemas (legacy vs learning_shared). The SSE route can call this.
|
|
284
|
+
|
|
285
|
+
5) Wire the router
|
|
286
|
+
- File: `monorepo/backend/app/routes/main.py`
|
|
287
|
+
- Include the new GET route (under the RL router you already mount).
|
|
288
|
+
- CORS: ensure EventSource allowed if frontend will use browser SSE.
|
|
289
|
+
|
|
290
|
+
Indexes/Perf you already have
|
|
291
|
+
- Jobs/events tables include per-job indexes and a `last_seq`; reads by `(job_id, seq)` are efficient:
|
|
292
|
+
```84:88:monorepo/backend/app/db/models/jobs.py
|
|
293
|
+
Index("idx_job_events_job_ts", "job_id", "ts"),
|
|
294
|
+
Index("idx_job_events_data_gin", "data", postgresql_using="gin"),
|
|
295
|
+
```
|
|
296
|
+
- For learning_shared events (if used), there are sequence indexes too:
|
|
297
|
+
```159:165:monorepo/backend/app/db/models/learning_shared.py
|
|
298
|
+
sa.Index("idx_learning_job_events_job_seq_idx", "job_id", "seq"),
|
|
299
|
+
```
|
|
300
|
+
|
|
301
|
+
Summary of minimal backend edits
|
|
302
|
+
- Add SSE route:
|
|
303
|
+
- `monorepo/backend/app/routes/clustered_training/core/routes.py` (GET `/api/rl/jobs/{job_id}/stream`)
|
|
304
|
+
- Consume events via repo:
|
|
305
|
+
- `monorepo/backend/app/orchestration/jobs/repository_db.py` (use `list_events`)
|
|
306
|
+
- Emit metrics from loops:
|
|
307
|
+
- `monorepo/backend/app/orchestration/hatchet/workflows.py` (append_event at train/eval steps)
|
|
308
|
+
- Optional shared helper:
|
|
309
|
+
- `monorepo/backend/app/routes/simple_training/services/storage_shared.py` (normalize event reads)
|
|
310
|
+
- Wireup:
|
|
311
|
+
- `monorepo/backend/app/routes/main.py` (include SSE route)
|
|
312
|
+
- No schema changes required; you already have `JobEvent`/indexes and event append plumbing.
|
|
313
|
+
|
|
314
|
+
### SDK additions for great terminal polling (with SSE fallback)
|
|
315
|
+
|
|
316
|
+
- RlJobsApi extensions (synth_ai/jobs/client.py)
|
|
317
|
+
- stream(job_id, since_id=None, types=None, heartbeat=None) -> async iterator of events (uses SSE; falls back to polling)
|
|
318
|
+
- events(job_id, after=None, limit=500) -> list[JobEvent] (poll)
|
|
319
|
+
- status(job_id) -> JobSummary (single snapshot)
|
|
320
|
+
|
|
321
|
+
- Event models (synth_ai/jobs/types.py)
|
|
322
|
+
- JobEvent base: {id, type, level, message, data, ts}
|
|
323
|
+
- StatusEvent, MetricEvent, LogEvent, ArtifactEvent (typed helpers)
|
|
324
|
+
|
|
325
|
+
- JobsWatcher helper (synth_ai/jobs/watcher.py)
|
|
326
|
+
- constructor(client, job_id, interval=2.0, prefer_sse=True, jsonl_path=None, types=None)
|
|
327
|
+
- run(on_event, stop_when=None) → handles SSE connect/reconnect, polling fallback, Last-Event-ID cursor, dedupe
|
|
328
|
+
- metrics_tracker: rolling windows per metric name (mean/std/min/max, last_value, last_step)
|
|
329
|
+
- backoff policy: jittered reconnect; rate limiter for render
|
|
330
|
+
|
|
331
|
+
- Terminal renderer (synth_ai/jobs/render.py)
|
|
332
|
+
- RichRenderer (or minimal TTY): compact line updates: t=HH:MM:SS | step/E | key metrics (avg_reward, loss, val_loss, success_rate)
|
|
333
|
+
- modes: one-line ticker vs. per-event lines; quiet mode; color by level/state
|
|
334
|
+
- JSONL sink: raw event writes without printing prompts/payloads
|
|
335
|
+
|
|
336
|
+
- CLI command (synth_ai/api/train/cli.py)
|
|
337
|
+
- synth-ai jobs watch <job_id> [--types metric,status] [--interval 2] [--jsonl out.jsonl] [--since-id N] [--no-sse]
|
|
338
|
+
- exit codes: 0 on succeeded, 1 on failed/canceled, 2 on timeout
|
|
339
|
+
|
|
340
|
+
- Utilities (synth_ai/jobs/utils.py)
|
|
341
|
+
- BackoffPolicy(retry, max) with jitter
|
|
342
|
+
- EventCursor(last_id, update)
|
|
343
|
+
- MetricsFormatter(map by job_type: RL vs FT metric labels)
|
|
344
|
+
- Coalescer: compress frequent metrics to ≤4 Hz
|
|
345
|
+
|
|
346
|
+
- Defaults/behavior
|
|
347
|
+
- Prefer SSE; if 404/405/close → fallback to polling events() every interval
|
|
348
|
+
- Heartbeat support; show “connected/no data” when only heartbeats
|
|
349
|
+
- Resume: honor --since-id or Last-Event-ID; persist cursor optionally
|
|
350
|
+
|
|
351
|
+
- Minimal backend assumptions
|
|
352
|
+
- GET /api/rl/jobs/{job_id}/stream (SSE) or /api/rl/jobs/{job_id}/events?after=… (poll)
|
|
353
|
+
- Events include metric/status/log/artifact with seq ids and ts
|
|
354
|
+
|
|
355
|
+
- Extensibility
|
|
356
|
+
- Plugin renderers per job_type (rl, sft/ft)
|
|
357
|
+
- Hooks: on_status_change, on_metric(name, value), on_artifact(url)
|
|
@@ -485,4 +485,10 @@ payload = {
|
|
|
485
485
|
}
|
|
486
486
|
```
|
|
487
487
|
|
|
488
|
-
Status: Notes committed on branch `friday-cleanup` and pushed.
|
|
488
|
+
Status: Notes committed on branch `friday-cleanup` and pushed.
|
|
489
|
+
|
|
490
|
+
### Operational guardrails
|
|
491
|
+
|
|
492
|
+
- Treat avg_turns == 0 (or a high fraction of episodes with turns == 0) as a failure condition; exit non‑zero.
|
|
493
|
+
- Fail fast when the first policy step returns a 4xx/5xx from the inference target; include the HTTP status and URL in the error message.
|
|
494
|
+
- CI hint: a tiny smoke run (2 seeds × 1 rollout) should see turns > 0 in healthy setups.
|
|
@@ -60,34 +60,55 @@ try:
|
|
|
60
60
|
HAS_HOSTED = True
|
|
61
61
|
except Exception:
|
|
62
62
|
try: # pragma: no cover - optional dependency path
|
|
63
|
-
from examples.
|
|
64
|
-
|
|
63
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.branching import ( # type: ignore
|
|
64
|
+
BranchingEnvironmentConfig,
|
|
65
65
|
)
|
|
66
|
-
from examples.
|
|
67
|
-
|
|
66
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.environment_routes import ( # type: ignore # noqa: E501
|
|
67
|
+
CrafterEnvironmentRoutes,
|
|
68
68
|
)
|
|
69
|
-
from examples.
|
|
70
|
-
|
|
69
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.policy_routes import ( # type: ignore
|
|
70
|
+
PolicyRoutes,
|
|
71
71
|
)
|
|
72
|
-
from examples.
|
|
72
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import ( # type: ignore
|
|
73
|
+
RolloutPayload,
|
|
74
|
+
)
|
|
75
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
76
|
+
EnvironmentConfig,
|
|
77
|
+
)
|
|
78
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
79
|
+
PolicyConfig,
|
|
80
|
+
)
|
|
81
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
82
|
+
RolloutRequest,
|
|
83
|
+
)
|
|
84
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
85
|
+
RolloutResponse,
|
|
86
|
+
)
|
|
87
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
88
|
+
RunSpec,
|
|
89
|
+
)
|
|
90
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
91
|
+
ToolUse,
|
|
92
|
+
)
|
|
93
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import ( # type: ignore
|
|
73
94
|
RolloutEnvSpec as LegacyRolloutEnvSpec,
|
|
74
95
|
)
|
|
75
|
-
from examples.
|
|
96
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
76
97
|
RolloutPolicySpec as LegacyRolloutPolicySpec,
|
|
77
98
|
)
|
|
78
|
-
from examples.
|
|
99
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
79
100
|
RolloutRecordConfig as LegacyRolloutRecordConfig,
|
|
80
101
|
)
|
|
81
|
-
from examples.
|
|
102
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
82
103
|
RolloutRequest as LegacyRolloutRequest,
|
|
83
104
|
)
|
|
84
|
-
from examples.
|
|
105
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
85
106
|
RolloutResponse as LegacyRolloutResponse,
|
|
86
107
|
)
|
|
87
|
-
from examples.
|
|
108
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
88
109
|
RolloutSafetyConfig as LegacyRolloutSafetyConfig,
|
|
89
110
|
)
|
|
90
|
-
from examples.
|
|
111
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
91
112
|
execute_rollout as legacy_execute_rollout,
|
|
92
113
|
)
|
|
93
114
|
HAS_HOSTED = True
|
|
@@ -264,7 +285,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, MiniSweDataset]:
|
|
|
264
285
|
def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
|
|
265
286
|
return TaskInfo(
|
|
266
287
|
task={"id": "swe_mini", "name": "mini-SWE Tasks", "version": "0.1.0"},
|
|
267
|
-
|
|
288
|
+
environment="swe-mini",
|
|
268
289
|
action_space={
|
|
269
290
|
"type": "tool",
|
|
270
291
|
"tools": ["run_command", "submit_patch"],
|
|
@@ -292,11 +313,6 @@ def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
|
|
|
292
313
|
},
|
|
293
314
|
"tool": {"name": "run_command", "parallel_tool_calls": False},
|
|
294
315
|
},
|
|
295
|
-
capabilities={
|
|
296
|
-
"supports_rollout": True,
|
|
297
|
-
"supports_env_lifecycle": True,
|
|
298
|
-
"requires_api_key_header": True,
|
|
299
|
-
},
|
|
300
316
|
limits={"max_ops": 2000, "max_time_s": 7200},
|
|
301
317
|
)
|
|
302
318
|
|
|
@@ -348,18 +364,31 @@ def provide_task_instances(
|
|
|
348
364
|
dataset: MiniSweDataset, base_info: TaskInfo, seeds: Sequence[int]
|
|
349
365
|
) -> Iterable[TaskInfo]:
|
|
350
366
|
infos: list[TaskInfo] = []
|
|
367
|
+
base_observation = getattr(base_info, "observation", None)
|
|
368
|
+
if hasattr(base_observation, "model_dump"):
|
|
369
|
+
base_observation_data = base_observation.model_dump()
|
|
370
|
+
elif isinstance(base_observation, dict):
|
|
371
|
+
base_observation_data = dict(base_observation)
|
|
372
|
+
else:
|
|
373
|
+
base_observation_data = {}
|
|
374
|
+
|
|
351
375
|
for seed in seeds:
|
|
352
376
|
instance = dataset.sample_by_index(int(seed))
|
|
353
377
|
infos.append(
|
|
354
378
|
TaskInfo(
|
|
355
379
|
task=base_info.task,
|
|
356
|
-
|
|
380
|
+
environment=base_info.environment,
|
|
357
381
|
action_space=base_info.action_space,
|
|
358
|
-
observation={
|
|
359
|
-
|
|
382
|
+
observation={
|
|
383
|
+
**base_observation_data,
|
|
384
|
+
"instance_id": instance["instance_id"],
|
|
385
|
+
},
|
|
386
|
+
dataset={
|
|
387
|
+
**base_info.dataset.model_dump(),
|
|
388
|
+
"instance_id": instance["instance_id"],
|
|
389
|
+
},
|
|
360
390
|
rubric=base_info.rubric,
|
|
361
391
|
inference=base_info.inference,
|
|
362
|
-
capabilities=base_info.capabilities,
|
|
363
392
|
limits=base_info.limits,
|
|
364
393
|
)
|
|
365
394
|
)
|
|
@@ -397,10 +426,10 @@ def build_config() -> TaskAppConfig:
|
|
|
397
426
|
HostedTaskAppCls = HostedTaskApp
|
|
398
427
|
except Exception:
|
|
399
428
|
try:
|
|
400
|
-
from examples.
|
|
401
|
-
|
|
429
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.hosted_app import ( # type: ignore
|
|
430
|
+
create_app,
|
|
402
431
|
)
|
|
403
|
-
HostedTaskAppCls =
|
|
432
|
+
HostedTaskAppCls = create_app
|
|
404
433
|
except Exception as exc: # pragma: no cover - optional dependency path
|
|
405
434
|
logger.warning("Unable to import HostedTaskApp for swe-mini: %s", exc)
|
|
406
435
|
if HostedTaskAppCls is not None:
|
|
@@ -1238,6 +1238,15 @@ async def execute_rollout(
|
|
|
1238
1238
|
)
|
|
1239
1239
|
|
|
1240
1240
|
# Build partial trajectory and return HTTP 200
|
|
1241
|
+
# Extract inference_url from policy meta (best effort)
|
|
1242
|
+
inference_url = None
|
|
1243
|
+
if policy_handle is not None:
|
|
1244
|
+
try:
|
|
1245
|
+
policy_snapshot = policy_handle.snapshot()
|
|
1246
|
+
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1247
|
+
except Exception:
|
|
1248
|
+
pass
|
|
1249
|
+
|
|
1241
1250
|
trajectory = RolloutTrajectory(
|
|
1242
1251
|
env_id=env_id,
|
|
1243
1252
|
policy_id=policy_id,
|
|
@@ -1249,6 +1258,7 @@ async def execute_rollout(
|
|
|
1249
1258
|
"at_op": op,
|
|
1250
1259
|
},
|
|
1251
1260
|
length=len(trajectory_steps),
|
|
1261
|
+
inference_url=inference_url, # NEW: Required for trace correlation
|
|
1252
1262
|
decision_samples=decision_samples if step_rewards_active else None,
|
|
1253
1263
|
)
|
|
1254
1264
|
metrics = RolloutMetrics(
|
|
@@ -1369,6 +1379,15 @@ async def execute_rollout(
|
|
|
1369
1379
|
},
|
|
1370
1380
|
)
|
|
1371
1381
|
trajectory_steps.append(term_step)
|
|
1382
|
+
# Extract inference_url from policy meta (best effort)
|
|
1383
|
+
inference_url = None
|
|
1384
|
+
if policy_handle is not None:
|
|
1385
|
+
try:
|
|
1386
|
+
policy_snapshot = policy_handle.snapshot()
|
|
1387
|
+
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1388
|
+
except Exception:
|
|
1389
|
+
pass
|
|
1390
|
+
|
|
1372
1391
|
trajectory = RolloutTrajectory(
|
|
1373
1392
|
env_id=env_id,
|
|
1374
1393
|
policy_id=policy_id,
|
|
@@ -1379,6 +1398,7 @@ async def execute_rollout(
|
|
|
1379
1398
|
"at_op": op,
|
|
1380
1399
|
},
|
|
1381
1400
|
length=len(trajectory_steps),
|
|
1401
|
+
inference_url=inference_url, # NEW: Required for trace correlation
|
|
1382
1402
|
decision_samples=decision_samples if step_rewards_active else None,
|
|
1383
1403
|
)
|
|
1384
1404
|
metrics = RolloutMetrics(
|
|
@@ -1460,6 +1480,15 @@ async def execute_rollout(
|
|
|
1460
1480
|
)
|
|
1461
1481
|
trajectory_steps.append(term_step)
|
|
1462
1482
|
# Build partial response
|
|
1483
|
+
# Extract inference_url from policy meta (best effort)
|
|
1484
|
+
inference_url = None
|
|
1485
|
+
if policy_handle is not None:
|
|
1486
|
+
try:
|
|
1487
|
+
policy_snapshot = policy_handle.snapshot()
|
|
1488
|
+
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1489
|
+
except Exception:
|
|
1490
|
+
pass
|
|
1491
|
+
|
|
1463
1492
|
trajectory = RolloutTrajectory(
|
|
1464
1493
|
env_id=env_id,
|
|
1465
1494
|
policy_id=policy_id,
|
|
@@ -1471,6 +1500,7 @@ async def execute_rollout(
|
|
|
1471
1500
|
"at_op": op,
|
|
1472
1501
|
},
|
|
1473
1502
|
length=len(trajectory_steps),
|
|
1503
|
+
inference_url=inference_url, # NEW: Required for trace correlation
|
|
1474
1504
|
decision_samples=decision_samples if step_rewards_active else None,
|
|
1475
1505
|
)
|
|
1476
1506
|
metrics = RolloutMetrics(
|
|
@@ -1688,12 +1718,22 @@ async def execute_rollout(
|
|
|
1688
1718
|
timing_final.setdefault("overhead_ms", 0.0)
|
|
1689
1719
|
|
|
1690
1720
|
# Build trajectory
|
|
1721
|
+
# Extract inference_url from policy meta
|
|
1722
|
+
inference_url = None
|
|
1723
|
+
if policy_handle is not None:
|
|
1724
|
+
try:
|
|
1725
|
+
policy_snapshot = policy_handle.snapshot()
|
|
1726
|
+
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1727
|
+
except Exception:
|
|
1728
|
+
pass
|
|
1729
|
+
|
|
1691
1730
|
trajectory = RolloutTrajectory(
|
|
1692
1731
|
env_id=env_id,
|
|
1693
1732
|
policy_id=policy_id,
|
|
1694
1733
|
steps=trajectory_steps,
|
|
1695
1734
|
final={"observation": _summarize_observation_for_storage(env_handle, current_obs)},
|
|
1696
1735
|
length=len(trajectory_steps),
|
|
1736
|
+
inference_url=inference_url, # NEW: Required for trace correlation
|
|
1697
1737
|
decision_samples=decision_samples if step_rewards_active else None,
|
|
1698
1738
|
)
|
|
1699
1739
|
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Simple test script for the GRPO Synth Envs Hosted Service.
|
|
4
|
-
|
|
5
|
-
Run this after starting the service with:
|
|
6
|
-
python main.py
|
|
7
|
-
"""
|
|
2
|
+
"""Manual smoke script for the GRPO Synth Envs Hosted Service."""
|
|
8
3
|
|
|
9
4
|
import asyncio
|
|
10
5
|
import json
|
|
11
6
|
|
|
12
7
|
import httpx
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
pytestmark = pytest.mark.skip(reason="Requires running hosted service on localhost:8000")
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
async def test_service():
|