synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +190 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
- examples/sft/evaluate.py +2 -0
- examples/sft/generate_traces.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +56 -26
- examples/swe/task_app/hosted/rollout.py +42 -0
- examples/swe/task_app/hosted/test_service.py +5 -6
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/__init__.py +0 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/__init__.py +0 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
- examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +1 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/math/__init__.py +0 -0
- examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +357 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +799 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +307 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/vlm/crafter_openai_vlm_agent.py +4 -4
- examples/vlm/run_crafter_vlm_benchmark.py +4 -4
- examples/warming_up_to_rl/groq_test.py +2 -0
- examples/warming_up_to_rl/run_local_rollout.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
- examples/warming_up_to_rl/run_rollout_remote.py +2 -0
- examples/workflows/__init__.py +0 -0
- examples/workflows/math_rl/__init__.py +0 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- synth_ai/__init__.py +2 -2
- synth_ai/api/models/supported.py +1 -0
- synth_ai/api/train/builders.py +25 -11
- synth_ai/api/train/cli.py +12 -6
- synth_ai/api/train/configs/__init__.py +10 -10
- synth_ai/api/train/configs/rl.py +5 -4
- synth_ai/api/train/configs/sft.py +4 -3
- synth_ai/api/train/env_resolver.py +5 -2
- synth_ai/api/train/supported_algos.py +10 -5
- synth_ai/api/train/utils.py +7 -4
- synth_ai/cli/__init__.py +48 -59
- synth_ai/cli/_modal_wrapper.py +3 -2
- synth_ai/cli/_storage.py +4 -3
- synth_ai/cli/_validate_task_app.py +11 -0
- synth_ai/cli/balance.py +4 -3
- synth_ai/cli/calc.py +2 -2
- synth_ai/cli/demo.py +14 -7
- synth_ai/cli/legacy_root_backup.py +1 -1
- synth_ai/cli/recent.py +1 -1
- synth_ai/cli/rl_demo.py +8 -7
- synth_ai/cli/root.py +0 -97
- synth_ai/cli/status.py +1 -1
- synth_ai/cli/task_apps.py +1922 -190
- synth_ai/cli/traces.py +1 -1
- synth_ai/cli/tui.py +57 -0
- synth_ai/cli/turso.py +1 -1
- synth_ai/cli/watch.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
- synth_ai/environments/examples/crafter_classic/environment.py +1 -1
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +27 -0
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +60 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/evals/client.py +58 -61
- synth_ai/jobs/client.py +16 -4
- synth_ai/judge_schemas.py +9 -9
- synth_ai/py.typed +0 -0
- synth_ai/task/__init__.py +24 -5
- synth_ai/task/apps/__init__.py +1 -0
- synth_ai/task/config.py +257 -0
- synth_ai/task/contracts.py +138 -39
- synth_ai/task/proxy.py +48 -56
- synth_ai/task/rubrics/__init__.py +56 -0
- synth_ai/task/rubrics/loaders.py +152 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
- synth_ai/task/server.py +8 -7
- synth_ai/task/trace_correlation_helpers.py +315 -0
- synth_ai/task/validators.py +413 -6
- synth_ai/tracing_v3/abstractions.py +3 -3
- synth_ai/tracing_v3/decorators.py +7 -3
- synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
- synth_ai/tracing_v3/replica_sync.py +4 -4
- synth_ai/tracing_v3/serialization.py +5 -5
- synth_ai/tracing_v3/session_tracer.py +16 -6
- synth_ai/tracing_v3/storage/base.py +29 -29
- synth_ai/tracing_v3/storage/config.py +3 -3
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/daemon.py +8 -7
- synth_ai/tracing_v3/turso/native_manager.py +66 -43
- synth_ai/tracing_v3/utils.py +3 -3
- synth_ai/tui/__init__.py +5 -0
- synth_ai/tui/__main__.py +13 -0
- synth_ai/tui/cli/__init__.py +1 -0
- synth_ai/tui/cli/query_experiments.py +164 -0
- synth_ai/tui/cli/query_experiments_v3.py +164 -0
- synth_ai/tui/dashboard.py +906 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
- examples/agora_ex/README_MoE.md +0 -224
- examples/agora_ex/__init__.py +0 -7
- examples/agora_ex/agora_ex.py +0 -65
- examples/agora_ex/agora_ex_task_app.py +0 -590
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
- examples/agora_ex/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/system_prompt_CURRENT.md +0 -63
- examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
- examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
- examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
- synth_ai/rubrics/__init__.py +0 -22
- synth_ai/task/rubrics.py +0 -219
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
- /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
- /examples/{rl/task_app → task_apps/math}/README.md +0 -0
- /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
- /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
- /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
- /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
|
|
2
|
+
|
|
3
|
+
Crafter
|
|
4
|
+
|
|
5
|
+
cd /Users/joshpurtell/Documents/GitHub/synth-ai && uvx synth-ai modal-serve grpo-crafter-task-app --name grpo-crafter-task-app --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/crafter/.env
|
|
6
|
+
|
|
7
|
+
cd /Users/joshpurtell/Documents/GitHub/monorepo && uv run modal deploy backend/app/routes/clustered_training/core/algorithms/gspo/app.py --env dev
|
|
8
|
+
|
|
9
|
+
uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
uvx synth-ai train \
|
|
13
|
+
--type rl \
|
|
14
|
+
--config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml \
|
|
15
|
+
--task-url https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run \
|
|
16
|
+
--backend https://synth-backend-dev-docker.onrender.com/api \
|
|
17
|
+
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/crafter/.env
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
---
|
|
22
|
+
|
|
23
|
+
Verilog
|
|
24
|
+
|
|
25
|
+
# 1. Deploy Verilog task app
|
|
26
|
+
cd /Users/joshpurtell/Documents/GitHub/synth-ai && uvx synth-ai modal-serve grpo-verilog --name grpo-verilog-task-app --env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/verilog/.env
|
|
27
|
+
|
|
28
|
+
# 2. Baseline eval using Synth backend (pre-training)
|
|
29
|
+
uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_eval_synth_qwen4b.toml
|
|
30
|
+
|
|
31
|
+
# 3. (Optional) External reference eval using Groq Qwen 32B
|
|
32
|
+
uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_eval_groq_qwen32b.toml
|
|
33
|
+
|
|
34
|
+
# 4. Deploy training backend
|
|
35
|
+
cd /Users/joshpurtell/Documents/GitHub/monorepo && uv run modal deploy backend/app/routes/clustered_training/core/algorithms/gspo/app.py --env dev
|
|
36
|
+
|
|
37
|
+
# 5. Run RL training
|
|
38
|
+
uvx synth-ai train \
|
|
39
|
+
--type rl \
|
|
40
|
+
--config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_rl_lora.toml \
|
|
41
|
+
--task-url https://synth-laboratories--grpo-verilog-task-app-fastapi-app-dev.modal.run \
|
|
42
|
+
--backend https://synth-backend-dev-docker.onrender.com/api \
|
|
43
|
+
--env-file /Users/joshpurtell/Documents/GitHub/monorepo/environments/verilog/.env
|
|
44
|
+
|
|
45
|
+
# 6. Post-training eval (update job_id in config first!)
|
|
46
|
+
# After training, note the job_id from logs (e.g., job_19a1823e56303de604f)
|
|
47
|
+
# Update verilog_eval_synth_trained_qwen8b.toml with your job_id
|
|
48
|
+
uvx synth-ai eval --config /Users/joshpurtell/Documents/GitHub/synth-ai/examples/multi_step/configs/verilog_eval_synth_trained_qwen8b.toml
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
# Verilog RL with LoRA Analysis
|
|
2
|
+
|
|
3
|
+
## Executive Summary
|
|
4
|
+
|
|
5
|
+
**✅ YES, Verilog can absolutely do RL with LoRA just like Crafter!** The architecture is nearly identical, but there are important considerations around model size and task complexity.
|
|
6
|
+
|
|
7
|
+
## Architecture Compatibility ✅
|
|
8
|
+
|
|
9
|
+
### **Same Foundation** (No changes needed)
|
|
10
|
+
- ✅ **Contracts**: Uses identical `RolloutRequest`/`RolloutResponse` as Crafter
|
|
11
|
+
- ✅ **Task App Framework**: Same `synth_ai.task.apps` framework
|
|
12
|
+
- ✅ **Environment Pattern**: Same `StatefulEnvironment` + tool-based architecture
|
|
13
|
+
- ✅ **Rubrics System**: Same evaluation and reward system
|
|
14
|
+
- ✅ **Trace Correlation**: Already implemented in `rollout_executor` (line 817 in `grpo_verilog.py`)
|
|
15
|
+
- ✅ **Modal Deployment**: Same deployment pattern as Crafter
|
|
16
|
+
|
|
17
|
+
### **Key Differences** (Considerations for LoRA)
|
|
18
|
+
|
|
19
|
+
#### 1. **Model Size: 8x Larger** ⚠️
|
|
20
|
+
```toml
|
|
21
|
+
# Verilog (current)
|
|
22
|
+
model = "qwen/qwen3-32b" # 32B parameters
|
|
23
|
+
|
|
24
|
+
# Crafter (working)
|
|
25
|
+
model = "Qwen/Qwen3-4B" # 4B parameters
|
|
26
|
+
```
|
|
27
|
+
**Impact**: Memory requirements 8x higher for LoRA training
|
|
28
|
+
**Solution**: Use gradient checkpointing, smaller batch sizes, or distributed training
|
|
29
|
+
|
|
30
|
+
#### 2. **Tool Set: Simpler but More Structured**
|
|
31
|
+
```python
|
|
32
|
+
# Verilog Tools (4 tools)
|
|
33
|
+
TOOLS = ["write_file", "compile", "simulate", "submit"]
|
|
34
|
+
|
|
35
|
+
# Crafter Tools (20+ tools)
|
|
36
|
+
# craft, move, attack, gather, etc.
|
|
37
|
+
```
|
|
38
|
+
|
|
39
|
+
**Verilog Advantages**:
|
|
40
|
+
- ✅ **Deterministic**: Write → Compile → Simulate → Submit workflow
|
|
41
|
+
- ✅ **Clear Success Criteria**: Tests pass = high reward
|
|
42
|
+
- ✅ **Sparse but Meaningful Rewards**: +10 for submit success, +1 for simulation pass
|
|
43
|
+
|
|
44
|
+
**Verilog Challenges**:
|
|
45
|
+
- ❌ **Sparser Rewards**: Fewer intermediate signals for learning
|
|
46
|
+
- ❌ **Longer Sequences**: Multi-step compilation chains
|
|
47
|
+
- ❌ **Error Recovery**: Must debug compilation failures
|
|
48
|
+
|
|
49
|
+
#### 3. **State Representation**
|
|
50
|
+
```python
|
|
51
|
+
# Verilog State (file-based)
|
|
52
|
+
{
|
|
53
|
+
"files": {"TopModule.v": "module TopModule(..."},
|
|
54
|
+
"compile_status": "Last compile: Success",
|
|
55
|
+
"simulate_status": "Last simulation: Passed",
|
|
56
|
+
"task_completed": false
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
# Crafter State (world-based)
|
|
60
|
+
{
|
|
61
|
+
"inventory": {"wood": 5, "stone": 3},
|
|
62
|
+
"position": [x, y],
|
|
63
|
+
"nearby_entities": [...],
|
|
64
|
+
"achievement_unlocked": true
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
|
|
68
|
+
## Configuration for LoRA RL
|
|
69
|
+
|
|
70
|
+
### **Option 1: Qwen3-0.6B (Recommended for testing)** ⭐
|
|
71
|
+
```toml
|
|
72
|
+
[algorithm]
|
|
73
|
+
type = "online"
|
|
74
|
+
method = "policy_gradient"
|
|
75
|
+
variety = "gspo"
|
|
76
|
+
|
|
77
|
+
[model]
|
|
78
|
+
base = "Qwen/Qwen3-0.6B" # ✅ Same as existing SFT configs
|
|
79
|
+
trainer_mode = "lora"
|
|
80
|
+
|
|
81
|
+
[lora]
|
|
82
|
+
r = 16
|
|
83
|
+
alpha = 32
|
|
84
|
+
dropout = 0.05
|
|
85
|
+
target_modules = ["all-linear"]
|
|
86
|
+
|
|
87
|
+
[rollout]
|
|
88
|
+
env_name = "verilog"
|
|
89
|
+
max_turns = 15
|
|
90
|
+
policy_name = "verilog-designer"
|
|
91
|
+
|
|
92
|
+
[training]
|
|
93
|
+
batch_size = 4 # ✅ Same as Crafter
|
|
94
|
+
gradient_accumulation_steps = 1
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
### **Option 2: Qwen3-32B (Production)** ⚠️
|
|
98
|
+
```toml
|
|
99
|
+
[algorithm]
|
|
100
|
+
type = "online"
|
|
101
|
+
method = "policy_gradient"
|
|
102
|
+
variety = "gspo"
|
|
103
|
+
|
|
104
|
+
[model]
|
|
105
|
+
base = "qwen/qwen3-32b" # ⚠️ 8x memory vs Crafter's 4B
|
|
106
|
+
trainer_mode = "lora"
|
|
107
|
+
|
|
108
|
+
[lora]
|
|
109
|
+
r = 16
|
|
110
|
+
alpha = 32
|
|
111
|
+
dropout = 0.05
|
|
112
|
+
target_modules = ["all-linear"]
|
|
113
|
+
|
|
114
|
+
[rollout]
|
|
115
|
+
env_name = "verilog"
|
|
116
|
+
max_turns = 15
|
|
117
|
+
policy_name = "verilog-designer"
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
### **Memory Optimization** (for 32B model)
|
|
121
|
+
```toml
|
|
122
|
+
[vllm]
|
|
123
|
+
max_model_len = 4096 # Shorter than Crafter's 8192
|
|
124
|
+
tensor_parallel_size = 2 # Distribute across GPUs
|
|
125
|
+
|
|
126
|
+
[training]
|
|
127
|
+
batch_size = 2 # Smaller than Crafter's 4
|
|
128
|
+
gradient_accumulation_steps = 4
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
## Task App Changes Needed
|
|
132
|
+
|
|
133
|
+
### **1. Mode Parameter Support** ✅ (Already implemented)
|
|
134
|
+
The Verilog task app already handles `mode="rl"` correctly:
|
|
135
|
+
```python
|
|
136
|
+
# In grpo_verilog.py rollout_executor
|
|
137
|
+
policy_config = dict(policy_config_raw)
|
|
138
|
+
# ... mode parameter flows through naturally
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
### **2. Trace Correlation** ✅ (Already implemented)
|
|
142
|
+
```python
|
|
143
|
+
# Line 817 in grpo_verilog.py
|
|
144
|
+
trajectory = RolloutTrajectory(
|
|
145
|
+
# ...
|
|
146
|
+
inference_url=agent.inference_url, # ✅ Required for trace correlation
|
|
147
|
+
decision_samples=None,
|
|
148
|
+
)
|
|
149
|
+
```
|
|
150
|
+
|
|
151
|
+
### **3. Rubric Integration** ✅ (Already configured)
|
|
152
|
+
```python
|
|
153
|
+
# In grpo_verilog.py
|
|
154
|
+
rubrics=RubricBundle(
|
|
155
|
+
outcome=OUTCOME_RUBRIC, # Tests pass reward
|
|
156
|
+
events=EVENTS_RUBRIC, # Process efficiency reward
|
|
157
|
+
)
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## RL Training Feasibility
|
|
161
|
+
|
|
162
|
+
### **✅ Works Great**
|
|
163
|
+
1. **Clear Success Signal**: Submit passing tests = +10 reward
|
|
164
|
+
2. **Guided Process**: Natural write→compile→simulate→submit progression
|
|
165
|
+
3. **Error Learning**: Agent must learn to debug compilation failures
|
|
166
|
+
4. **Hardware Design**: Real-world applicable skills
|
|
167
|
+
|
|
168
|
+
### **⚠️ Challenges**
|
|
169
|
+
1. **Model Size**: 32B vs 4B = 8x memory, slower training
|
|
170
|
+
2. **Sparse Rewards**: Fewer learning signals than Crafter's dense rewards
|
|
171
|
+
3. **Longer Episodes**: 15+ steps vs Crafter's 10 steps
|
|
172
|
+
4. **Compilation Errors**: Must learn to interpret and fix syntax errors
|
|
173
|
+
|
|
174
|
+
## Recommended Approach
|
|
175
|
+
|
|
176
|
+
### **Phase 1: Start with Qwen3-0.6B** ⭐ (as you requested)
|
|
177
|
+
```toml
|
|
178
|
+
# Perfect for testing - same model used in existing SFT configs
|
|
179
|
+
model = "Qwen/Qwen3-0.6B"
|
|
180
|
+
batch_size = 4 # Same as Crafter
|
|
181
|
+
```
|
|
182
|
+
- ✅ **Zero setup**: Already configured in `synth-ai/examples/sft/configs/crafter_lora_qwen0p6b.toml`
|
|
183
|
+
- ✅ **Fast iteration**: 0.6B parameters = quick training cycles
|
|
184
|
+
- ✅ **Memory efficient**: Fits on single GPU easily
|
|
185
|
+
- ✅ **Proven baseline**: Same model used in RL demos and SFT examples
|
|
186
|
+
|
|
187
|
+
### **Phase 2: Scale to Qwen3-8B** (if 0.6B works well)
|
|
188
|
+
```toml
|
|
189
|
+
model = "qwen/qwen3-8b"
|
|
190
|
+
batch_size = 2
|
|
191
|
+
gradient_accumulation_steps = 2
|
|
192
|
+
```
|
|
193
|
+
|
|
194
|
+
### **Phase 3: Production with Qwen3-32B**
|
|
195
|
+
```toml
|
|
196
|
+
model = "qwen/qwen3-32b"
|
|
197
|
+
tensor_parallel_size = 2
|
|
198
|
+
batch_size = 1
|
|
199
|
+
gradient_accumulation_steps = 4
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
### **Phase 3: Optimize for Verilog Domain**
|
|
203
|
+
Consider fine-tuning the base model on:
|
|
204
|
+
- Verilog syntax and semantics
|
|
205
|
+
- Hardware design patterns
|
|
206
|
+
- Compilation error messages
|
|
207
|
+
- Testbench writing
|
|
208
|
+
|
|
209
|
+
## Conclusion
|
|
210
|
+
|
|
211
|
+
**✅ Verilog RL with LoRA is absolutely feasible** and should work with the same pipeline as Crafter. The main differences are:
|
|
212
|
+
|
|
213
|
+
1. **Larger model** (32B vs 4B) requires memory optimization
|
|
214
|
+
2. **Sparser rewards** may need different reward shaping
|
|
215
|
+
3. **More structured tasks** could actually make learning easier
|
|
216
|
+
4. **Real hardware skills** make it more valuable than game tasks
|
|
217
|
+
|
|
218
|
+
**Recommended next step**: Create a `verilog_rl_lora.toml` config starting with Qwen3-8B and adapt the reward rubrics for the compilation workflow.
|
examples/sft/evaluate.py
CHANGED
|
@@ -44,6 +44,7 @@ def _ops(n: int) -> list[str]:
|
|
|
44
44
|
|
|
45
45
|
|
|
46
46
|
def _request(seed: int, a: EvalArgs) -> RolloutRequest:
|
|
47
|
+
from synth_ai.task.contracts import RolloutMode
|
|
47
48
|
return RolloutRequest(
|
|
48
49
|
run_id=f"eval-{seed}",
|
|
49
50
|
env=RolloutEnvSpec(env_name="crafter", seed=seed, config={}),
|
|
@@ -53,6 +54,7 @@ def _request(seed: int, a: EvalArgs) -> RolloutRequest:
|
|
|
53
54
|
),
|
|
54
55
|
ops=_ops(a.max_llm_calls),
|
|
55
56
|
record=RolloutRecordConfig(trajectories=True, return_trace=False, trace_format="compact"),
|
|
57
|
+
mode=RolloutMode.EVAL,
|
|
56
58
|
)
|
|
57
59
|
|
|
58
60
|
|
examples/sft/generate_traces.py
CHANGED
|
@@ -42,6 +42,7 @@ def _build_ops(max_llm_calls: int) -> list[str]:
|
|
|
42
42
|
|
|
43
43
|
|
|
44
44
|
def _build_request(seed: int, run_id: str, model: str, inference_url: str, api_key: str, *, max_llm_calls: int, return_trace: bool) -> RolloutRequest:
|
|
45
|
+
from synth_ai.task.contracts import RolloutMode
|
|
45
46
|
policy_cfg: dict[str, Any] = {
|
|
46
47
|
"model": model,
|
|
47
48
|
"inference_url": inference_url,
|
|
@@ -54,6 +55,7 @@ def _build_request(seed: int, run_id: str, model: str, inference_url: str, api_k
|
|
|
54
55
|
policy=RolloutPolicySpec(policy_name="crafter-react", config=policy_cfg),
|
|
55
56
|
ops=_build_ops(max_llm_calls),
|
|
56
57
|
record=record,
|
|
58
|
+
mode=RolloutMode.EVAL,
|
|
57
59
|
)
|
|
58
60
|
|
|
59
61
|
|
|
@@ -60,34 +60,55 @@ try:
|
|
|
60
60
|
HAS_HOSTED = True
|
|
61
61
|
except Exception:
|
|
62
62
|
try: # pragma: no cover - optional dependency path
|
|
63
|
-
from examples.
|
|
64
|
-
|
|
63
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.branching import ( # type: ignore
|
|
64
|
+
BranchingEnvironmentConfig,
|
|
65
65
|
)
|
|
66
|
-
from examples.
|
|
67
|
-
|
|
66
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.environment_routes import ( # type: ignore # noqa: E501
|
|
67
|
+
CrafterEnvironmentRoutes,
|
|
68
68
|
)
|
|
69
|
-
from examples.
|
|
70
|
-
|
|
69
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.policy_routes import ( # type: ignore
|
|
70
|
+
PolicyRoutes,
|
|
71
71
|
)
|
|
72
|
-
from examples.
|
|
72
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import ( # type: ignore
|
|
73
|
+
RolloutPayload,
|
|
74
|
+
)
|
|
75
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
76
|
+
EnvironmentConfig,
|
|
77
|
+
)
|
|
78
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
79
|
+
PolicyConfig,
|
|
80
|
+
)
|
|
81
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
82
|
+
RolloutRequest,
|
|
83
|
+
)
|
|
84
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
85
|
+
RolloutResponse,
|
|
86
|
+
)
|
|
87
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
88
|
+
RunSpec,
|
|
89
|
+
)
|
|
90
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.rollout import (
|
|
91
|
+
ToolUse,
|
|
92
|
+
)
|
|
93
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import ( # type: ignore
|
|
73
94
|
RolloutEnvSpec as LegacyRolloutEnvSpec,
|
|
74
95
|
)
|
|
75
|
-
from examples.
|
|
96
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
76
97
|
RolloutPolicySpec as LegacyRolloutPolicySpec,
|
|
77
98
|
)
|
|
78
|
-
from examples.
|
|
99
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
79
100
|
RolloutRecordConfig as LegacyRolloutRecordConfig,
|
|
80
101
|
)
|
|
81
|
-
from examples.
|
|
102
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
82
103
|
RolloutRequest as LegacyRolloutRequest,
|
|
83
104
|
)
|
|
84
|
-
from examples.
|
|
105
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
85
106
|
RolloutResponse as LegacyRolloutResponse,
|
|
86
107
|
)
|
|
87
|
-
from examples.
|
|
108
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
88
109
|
RolloutSafetyConfig as LegacyRolloutSafetyConfig,
|
|
89
110
|
)
|
|
90
|
-
from examples.
|
|
111
|
+
from examples.task_apps.crafter.task_app.hosted.rollout import (
|
|
91
112
|
execute_rollout as legacy_execute_rollout,
|
|
92
113
|
)
|
|
93
114
|
HAS_HOSTED = True
|
|
@@ -264,7 +285,7 @@ def build_dataset() -> tuple[TaskDatasetRegistry, MiniSweDataset]:
|
|
|
264
285
|
def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
|
|
265
286
|
return TaskInfo(
|
|
266
287
|
task={"id": "swe_mini", "name": "mini-SWE Tasks", "version": "0.1.0"},
|
|
267
|
-
|
|
288
|
+
environment="swe-mini",
|
|
268
289
|
action_space={
|
|
269
290
|
"type": "tool",
|
|
270
291
|
"tools": ["run_command", "submit_patch"],
|
|
@@ -292,11 +313,6 @@ def _base_task_info(dataset: MiniSweDataset) -> TaskInfo:
|
|
|
292
313
|
},
|
|
293
314
|
"tool": {"name": "run_command", "parallel_tool_calls": False},
|
|
294
315
|
},
|
|
295
|
-
capabilities={
|
|
296
|
-
"supports_rollout": True,
|
|
297
|
-
"supports_env_lifecycle": True,
|
|
298
|
-
"requires_api_key_header": True,
|
|
299
|
-
},
|
|
300
316
|
limits={"max_ops": 2000, "max_time_s": 7200},
|
|
301
317
|
)
|
|
302
318
|
|
|
@@ -348,18 +364,31 @@ def provide_task_instances(
|
|
|
348
364
|
dataset: MiniSweDataset, base_info: TaskInfo, seeds: Sequence[int]
|
|
349
365
|
) -> Iterable[TaskInfo]:
|
|
350
366
|
infos: list[TaskInfo] = []
|
|
367
|
+
base_observation = getattr(base_info, "observation", None)
|
|
368
|
+
if hasattr(base_observation, "model_dump"):
|
|
369
|
+
base_observation_data = base_observation.model_dump()
|
|
370
|
+
elif isinstance(base_observation, dict):
|
|
371
|
+
base_observation_data = dict(base_observation)
|
|
372
|
+
else:
|
|
373
|
+
base_observation_data = {}
|
|
374
|
+
|
|
351
375
|
for seed in seeds:
|
|
352
376
|
instance = dataset.sample_by_index(int(seed))
|
|
353
377
|
infos.append(
|
|
354
378
|
TaskInfo(
|
|
355
379
|
task=base_info.task,
|
|
356
|
-
|
|
380
|
+
environment=base_info.environment,
|
|
357
381
|
action_space=base_info.action_space,
|
|
358
|
-
observation={
|
|
359
|
-
|
|
382
|
+
observation={
|
|
383
|
+
**base_observation_data,
|
|
384
|
+
"instance_id": instance["instance_id"],
|
|
385
|
+
},
|
|
386
|
+
dataset={
|
|
387
|
+
**base_info.dataset.model_dump(),
|
|
388
|
+
"instance_id": instance["instance_id"],
|
|
389
|
+
},
|
|
360
390
|
rubric=base_info.rubric,
|
|
361
391
|
inference=base_info.inference,
|
|
362
|
-
capabilities=base_info.capabilities,
|
|
363
392
|
limits=base_info.limits,
|
|
364
393
|
)
|
|
365
394
|
)
|
|
@@ -397,10 +426,10 @@ def build_config() -> TaskAppConfig:
|
|
|
397
426
|
HostedTaskAppCls = HostedTaskApp
|
|
398
427
|
except Exception:
|
|
399
428
|
try:
|
|
400
|
-
from examples.
|
|
401
|
-
|
|
429
|
+
from examples.task_apps.crafter.task_app.synth_envs_hosted.hosted_app import ( # type: ignore
|
|
430
|
+
create_app,
|
|
402
431
|
)
|
|
403
|
-
HostedTaskAppCls =
|
|
432
|
+
HostedTaskAppCls = create_app
|
|
404
433
|
except Exception as exc: # pragma: no cover - optional dependency path
|
|
405
434
|
logger.warning("Unable to import HostedTaskApp for swe-mini: %s", exc)
|
|
406
435
|
if HostedTaskAppCls is not None:
|
|
@@ -455,6 +484,7 @@ def build_config() -> TaskAppConfig:
|
|
|
455
484
|
|
|
456
485
|
legacy_request = LegacyRolloutRequest(
|
|
457
486
|
run_id=request.run_id,
|
|
487
|
+
mode=request.mode, # Preserve mode for nested requests
|
|
458
488
|
env=LegacyRolloutEnvSpec(
|
|
459
489
|
env_id=request.env.env_id,
|
|
460
490
|
env_name=env_spec.env_name or "swe-mini",
|
|
@@ -12,6 +12,7 @@ from fastapi import APIRouter, HTTPException, Request, status
|
|
|
12
12
|
from pydantic import BaseModel
|
|
13
13
|
from synth_ai.lm.vendors.base import BaseLMResponse
|
|
14
14
|
from synth_ai.task.tracing_utils import unique_sft_path
|
|
15
|
+
from synth_ai.task.contracts import RolloutMode
|
|
15
16
|
from synth_ai.tracing_v3.abstractions import EnvironmentEvent, LMCAISEvent, TimeRecord
|
|
16
17
|
from synth_ai.tracing_v3.llm_call_record_helpers import create_llm_call_record_from_response
|
|
17
18
|
from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
@@ -120,6 +121,7 @@ class RolloutRequest(BaseModel):
|
|
|
120
121
|
# Optional run/session context
|
|
121
122
|
training_session_id: str | None = None
|
|
122
123
|
synth_base_url: str | None = None
|
|
124
|
+
mode: RolloutMode # Required: explicit RL vs EVAL mode
|
|
123
125
|
|
|
124
126
|
|
|
125
127
|
class RolloutStep(BaseModel):
|
|
@@ -1238,6 +1240,15 @@ async def execute_rollout(
|
|
|
1238
1240
|
)
|
|
1239
1241
|
|
|
1240
1242
|
# Build partial trajectory and return HTTP 200
|
|
1243
|
+
# Extract inference_url from policy meta (best effort)
|
|
1244
|
+
inference_url = None
|
|
1245
|
+
if policy_handle is not None:
|
|
1246
|
+
try:
|
|
1247
|
+
policy_snapshot = policy_handle.snapshot()
|
|
1248
|
+
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1249
|
+
except Exception:
|
|
1250
|
+
pass
|
|
1251
|
+
|
|
1241
1252
|
trajectory = RolloutTrajectory(
|
|
1242
1253
|
env_id=env_id,
|
|
1243
1254
|
policy_id=policy_id,
|
|
@@ -1249,6 +1260,7 @@ async def execute_rollout(
|
|
|
1249
1260
|
"at_op": op,
|
|
1250
1261
|
},
|
|
1251
1262
|
length=len(trajectory_steps),
|
|
1263
|
+
inference_url=inference_url, # NEW: Required for trace correlation
|
|
1252
1264
|
decision_samples=decision_samples if step_rewards_active else None,
|
|
1253
1265
|
)
|
|
1254
1266
|
metrics = RolloutMetrics(
|
|
@@ -1369,6 +1381,15 @@ async def execute_rollout(
|
|
|
1369
1381
|
},
|
|
1370
1382
|
)
|
|
1371
1383
|
trajectory_steps.append(term_step)
|
|
1384
|
+
# Extract inference_url from policy meta (best effort)
|
|
1385
|
+
inference_url = None
|
|
1386
|
+
if policy_handle is not None:
|
|
1387
|
+
try:
|
|
1388
|
+
policy_snapshot = policy_handle.snapshot()
|
|
1389
|
+
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1390
|
+
except Exception:
|
|
1391
|
+
pass
|
|
1392
|
+
|
|
1372
1393
|
trajectory = RolloutTrajectory(
|
|
1373
1394
|
env_id=env_id,
|
|
1374
1395
|
policy_id=policy_id,
|
|
@@ -1379,6 +1400,7 @@ async def execute_rollout(
|
|
|
1379
1400
|
"at_op": op,
|
|
1380
1401
|
},
|
|
1381
1402
|
length=len(trajectory_steps),
|
|
1403
|
+
inference_url=inference_url, # NEW: Required for trace correlation
|
|
1382
1404
|
decision_samples=decision_samples if step_rewards_active else None,
|
|
1383
1405
|
)
|
|
1384
1406
|
metrics = RolloutMetrics(
|
|
@@ -1460,6 +1482,15 @@ async def execute_rollout(
|
|
|
1460
1482
|
)
|
|
1461
1483
|
trajectory_steps.append(term_step)
|
|
1462
1484
|
# Build partial response
|
|
1485
|
+
# Extract inference_url from policy meta (best effort)
|
|
1486
|
+
inference_url = None
|
|
1487
|
+
if policy_handle is not None:
|
|
1488
|
+
try:
|
|
1489
|
+
policy_snapshot = policy_handle.snapshot()
|
|
1490
|
+
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1491
|
+
except Exception:
|
|
1492
|
+
pass
|
|
1493
|
+
|
|
1463
1494
|
trajectory = RolloutTrajectory(
|
|
1464
1495
|
env_id=env_id,
|
|
1465
1496
|
policy_id=policy_id,
|
|
@@ -1471,6 +1502,7 @@ async def execute_rollout(
|
|
|
1471
1502
|
"at_op": op,
|
|
1472
1503
|
},
|
|
1473
1504
|
length=len(trajectory_steps),
|
|
1505
|
+
inference_url=inference_url, # NEW: Required for trace correlation
|
|
1474
1506
|
decision_samples=decision_samples if step_rewards_active else None,
|
|
1475
1507
|
)
|
|
1476
1508
|
metrics = RolloutMetrics(
|
|
@@ -1688,12 +1720,22 @@ async def execute_rollout(
|
|
|
1688
1720
|
timing_final.setdefault("overhead_ms", 0.0)
|
|
1689
1721
|
|
|
1690
1722
|
# Build trajectory
|
|
1723
|
+
# Extract inference_url from policy meta
|
|
1724
|
+
inference_url = None
|
|
1725
|
+
if policy_handle is not None:
|
|
1726
|
+
try:
|
|
1727
|
+
policy_snapshot = policy_handle.snapshot()
|
|
1728
|
+
inference_url = policy_snapshot.get("config", {}).get("inference_url")
|
|
1729
|
+
except Exception:
|
|
1730
|
+
pass
|
|
1731
|
+
|
|
1691
1732
|
trajectory = RolloutTrajectory(
|
|
1692
1733
|
env_id=env_id,
|
|
1693
1734
|
policy_id=policy_id,
|
|
1694
1735
|
steps=trajectory_steps,
|
|
1695
1736
|
final={"observation": _summarize_observation_for_storage(env_handle, current_obs)},
|
|
1696
1737
|
length=len(trajectory_steps),
|
|
1738
|
+
inference_url=inference_url, # NEW: Required for trace correlation
|
|
1697
1739
|
decision_samples=decision_samples if step_rewards_active else None,
|
|
1698
1740
|
)
|
|
1699
1741
|
|
|
@@ -1,15 +1,14 @@
|
|
|
1
1
|
#!/usr/bin/env python3
|
|
2
|
-
"""
|
|
3
|
-
Simple test script for the GRPO Synth Envs Hosted Service.
|
|
4
|
-
|
|
5
|
-
Run this after starting the service with:
|
|
6
|
-
python main.py
|
|
7
|
-
"""
|
|
2
|
+
"""Manual smoke script for the GRPO Synth Envs Hosted Service."""
|
|
8
3
|
|
|
9
4
|
import asyncio
|
|
10
5
|
import json
|
|
11
6
|
|
|
12
7
|
import httpx
|
|
8
|
+
import pytest
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
pytestmark = pytest.mark.skip(reason="Requires running hosted service on localhost:8000")
|
|
13
12
|
|
|
14
13
|
|
|
15
14
|
async def test_service():
|