synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +60 -2
- synth_ai/api/train/builders.py +347 -39
- synth_ai/api/train/cli.py +895 -160
- synth_ai/api/train/config_finder.py +103 -25
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +70 -20
- synth_ai/api/train/pollers.py +29 -4
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +6 -4
- synth_ai/api/train/utils.py +64 -52
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +85 -63
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +156 -116
- synth_ai/cli/root.py +131 -132
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +2284 -257
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +579 -291
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +50 -30
- synth_ai/task/apps/__init__.py +63 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +165 -64
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +59 -66
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +65 -31
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +44 -28
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +73 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -258
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -107
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/task/apps/grpo_crafter.py +0 -438
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
- synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
# Smoke Testing Your Task App
|
|
2
|
+
|
|
3
|
+
This guide shows how to quickly test your task app using the `synth-ai smoke` command with auto-start features.
|
|
4
|
+
|
|
5
|
+
## Quick Start
|
|
6
|
+
|
|
7
|
+
The easiest way to smoke test is using the `[smoke]` section in your RL config:
|
|
8
|
+
|
|
9
|
+
```bash
|
|
10
|
+
cd examples/blog_posts/warming_up_to_rl
|
|
11
|
+
uv run synth-ai smoke --config configs/smoke_test.toml
|
|
12
|
+
```
|
|
13
|
+
|
|
14
|
+
**That's it!** The smoke command will:
|
|
15
|
+
1. ✅ Auto-start sqld server for tracing (if `sqld_auto_start = true`)
|
|
16
|
+
2. ✅ Auto-start your task app on port 8765 (if `task_app_name` is set)
|
|
17
|
+
3. ✅ Run 10 rollout steps with `gpt-5-nano` using synthetic mocking
|
|
18
|
+
4. ✅ Automatically stop all background services when done
|
|
19
|
+
|
|
20
|
+
**Expected output:**
|
|
21
|
+
```
|
|
22
|
+
[smoke] sqld ready
|
|
23
|
+
[smoke] Task app ready at http://localhost:8765 (status=400)
|
|
24
|
+
[mock-rl] server ready http://127.0.0.1:51798 backend=synthetic
|
|
25
|
+
>> POST /rollout run_id=smoke-... env=crafter policy=crafter-react
|
|
26
|
+
[mock-rl] ← request backend=synthetic model=gpt-5-nano messages=2
|
|
27
|
+
[mock-rl] → response tool_calls=1 backend=synthetic
|
|
28
|
+
rollout[0:0] episodes=1 steps=10 mean_return=1.0000
|
|
29
|
+
✓ Smoke rollouts complete
|
|
30
|
+
successes=1/1 total_steps=10 v3_traces=1/1 nonzero_returns=1/1
|
|
31
|
+
[smoke] Background services stopped
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Configuration
|
|
35
|
+
|
|
36
|
+
Add a `[smoke]` section to your RL config:
|
|
37
|
+
|
|
38
|
+
```toml
|
|
39
|
+
[smoke]
|
|
40
|
+
# Auto-start task app
|
|
41
|
+
task_app_name = "grpo-crafter"
|
|
42
|
+
task_app_port = 8765
|
|
43
|
+
task_app_env_file = ".env"
|
|
44
|
+
task_app_force = true
|
|
45
|
+
|
|
46
|
+
# Auto-start sqld
|
|
47
|
+
sqld_auto_start = true
|
|
48
|
+
sqld_db_path = "./traces/local.db"
|
|
49
|
+
sqld_hrana_port = 8080
|
|
50
|
+
sqld_http_port = 8081
|
|
51
|
+
|
|
52
|
+
# Test parameters
|
|
53
|
+
max_steps = 10
|
|
54
|
+
policy = "gpt-5-nano"
|
|
55
|
+
mock_backend = "synthetic" # or "openai" (requires valid OpenAI API key)
|
|
56
|
+
return_trace = true
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Testing Methods
|
|
60
|
+
|
|
61
|
+
### 1. Full Auto (Recommended)
|
|
62
|
+
Everything auto-starts from config:
|
|
63
|
+
```bash
|
|
64
|
+
uv run synth-ai smoke --config configs/smoke_test.toml
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### 2. Manual Task App + Auto sqld
|
|
68
|
+
Start task app manually, auto-start sqld:
|
|
69
|
+
```bash
|
|
70
|
+
# Config with sqld_auto_start=true but no task_app_name
|
|
71
|
+
uv run synth-ai smoke --config configs/my_config.toml --url http://localhost:8765
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### 3. Override Config Settings
|
|
75
|
+
Override any config value via CLI:
|
|
76
|
+
```bash
|
|
77
|
+
uv run synth-ai smoke --config configs/smoke_test.toml --max-steps 5
|
|
78
|
+
```
|
|
79
|
+
|
|
80
|
+
### 4. No Config (Manual Everything)
|
|
81
|
+
```bash
|
|
82
|
+
# Start services manually in separate terminals:
|
|
83
|
+
# Terminal 1: sqld --db-path ./traces/local.db --hrana-listen-addr 127.0.0.1:8080 --http-listen-addr 127.0.0.1:8081
|
|
84
|
+
# Terminal 2: uv run synth-ai task-app serve grpo-crafter --port 8765 --env-file .env --force
|
|
85
|
+
|
|
86
|
+
# Terminal 3: Run smoke test
|
|
87
|
+
uv run synth-ai smoke --url http://localhost:8765 \
|
|
88
|
+
--env-name crafter \
|
|
89
|
+
--policy-name crafter-react \
|
|
90
|
+
--max-steps 10 \
|
|
91
|
+
--policy mock \
|
|
92
|
+
--mock-backend openai
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
## Prerequisites
|
|
96
|
+
|
|
97
|
+
### Install sqld (for tracing)
|
|
98
|
+
```bash
|
|
99
|
+
brew install sqld
|
|
100
|
+
# or
|
|
101
|
+
curl -fsSL https://get.turso.com/sqld | bash
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Verify Installation
|
|
105
|
+
```bash
|
|
106
|
+
which sqld
|
|
107
|
+
# Should output: /opt/homebrew/bin/sqld or similar
|
|
108
|
+
```
|
|
109
|
+
|
|
110
|
+
## Common Issues
|
|
111
|
+
|
|
112
|
+
### sqld not found
|
|
113
|
+
If you see "sqld not found in PATH":
|
|
114
|
+
```bash
|
|
115
|
+
brew install sqld
|
|
116
|
+
```
|
|
117
|
+
|
|
118
|
+
### Port already in use
|
|
119
|
+
Use `task_app_force = true` in config, or:
|
|
120
|
+
```bash
|
|
121
|
+
# Kill processes on ports 8080, 8081, 8765
|
|
122
|
+
lsof -ti:8080,8081,8765 | xargs kill -9
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
### Task app not starting
|
|
126
|
+
Check the error output - you may need:
|
|
127
|
+
- Valid `.env` file with required keys
|
|
128
|
+
- Correct task app name registered in your codebase
|
|
129
|
+
|
|
130
|
+
## Example Output
|
|
131
|
+
|
|
132
|
+
```
|
|
133
|
+
[smoke] Loaded configuration from configs/smoke_test.toml
|
|
134
|
+
[smoke] Config keys: task_app_name, task_app_port, sqld_auto_start, max_steps, policy
|
|
135
|
+
[smoke] Starting sqld server...
|
|
136
|
+
[smoke] DB path: /Users/you/project/traces/local.db
|
|
137
|
+
[smoke] Hrana port: 8080, HTTP port: 8081
|
|
138
|
+
[smoke] sqld ready
|
|
139
|
+
[smoke] Starting task app 'grpo-crafter' on port 8765...
|
|
140
|
+
[smoke] Task app ready at http://localhost:8765
|
|
141
|
+
[smoke] Task app started, will use URL: http://localhost:8765
|
|
142
|
+
[mock-rl] server ready http://127.0.0.1:52134 backend=openai
|
|
143
|
+
>> POST /rollout run_id=smoke-abc123...
|
|
144
|
+
rollout[0:0] episodes=1 steps=20 mean_return=1.2500
|
|
145
|
+
✓ Smoke rollouts complete
|
|
146
|
+
successes=1/1 total_steps=20 v3_traces=1/1 nonzero_returns=1/1
|
|
147
|
+
[smoke] Stopping sqld...
|
|
148
|
+
[smoke] Stopping task_app...
|
|
149
|
+
[smoke] Background services stopped
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Next Steps
|
|
153
|
+
|
|
154
|
+
Once smoke tests pass:
|
|
155
|
+
1. Train your model: `uv run synth-ai train --type rl --config configs/your_config.toml`
|
|
156
|
+
2. Check traces: Look in `./traces/` directory
|
|
157
|
+
3. Monitor training: Use the Synth dashboard
|
|
158
|
+
|
|
159
|
+
## Full Config Reference
|
|
160
|
+
|
|
161
|
+
See [`configs/smoke_test.toml`](configs/smoke_test.toml) for a complete example.
|
|
162
|
+
|
|
163
|
+
See [CLI Smoke Documentation](https://docs.usesynth.ai/cli/smoke) for all options.
|
|
164
|
+
|
|
@@ -0,0 +1,253 @@
|
|
|
1
|
+
# Smoke Test Implementation - Complete
|
|
2
|
+
|
|
3
|
+
## Summary
|
|
4
|
+
|
|
5
|
+
The smoke test now provides **complete visibility into RL training rollouts**, including:
|
|
6
|
+
|
|
7
|
+
✅ **Auto-start background services** (sqld, task app)
|
|
8
|
+
✅ **Real OpenAI inference** with gpt-4o-mini
|
|
9
|
+
✅ **Tool call display** - see every action the policy takes
|
|
10
|
+
✅ **Trace validation** - verify v3 trace format
|
|
11
|
+
✅ **Clean output** - all diagnostic noise suppressed
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
```bash
|
|
16
|
+
cd examples/blog_posts/warming_up_to_rl
|
|
17
|
+
uv run synth-ai smoke --config configs/smoke_test.toml
|
|
18
|
+
```
|
|
19
|
+
|
|
20
|
+
**Output shows:**
|
|
21
|
+
- Service startup (sqld, task app)
|
|
22
|
+
- Real-time inference requests
|
|
23
|
+
- **All 10 tool calls with arguments** (e.g., `interact_many({"actions":["move_up","move_up"]})`)
|
|
24
|
+
- Rollout metrics (steps, returns, rewards)
|
|
25
|
+
- Success validation
|
|
26
|
+
|
|
27
|
+
## Documentation
|
|
28
|
+
|
|
29
|
+
All documentation has been updated for future agents:
|
|
30
|
+
|
|
31
|
+
### 1. User Documentation
|
|
32
|
+
- **`SMOKE_TESTING.md`** - How to run smoke tests, what to expect
|
|
33
|
+
- **`configs/smoke_test.toml`** - Well-commented example configuration
|
|
34
|
+
- **`monorepo/docs/cli/smoke.mdx`** - Mintlify CLI documentation
|
|
35
|
+
|
|
36
|
+
### 2. Developer Documentation
|
|
37
|
+
- **`ARCHITECTURE.md`** - Internal architecture, troubleshooting guide
|
|
38
|
+
- **`synth_ai/cli/commands/smoke/core.py`** - Extensive inline comments explaining tool call extraction
|
|
39
|
+
|
|
40
|
+
### 3. Code Comments
|
|
41
|
+
|
|
42
|
+
**Tool Call Extraction (core.py lines 946-997):**
|
|
43
|
+
```python
|
|
44
|
+
# Extract and display tool calls from v3 trace
|
|
45
|
+
#
|
|
46
|
+
# IMPORTANT: Tool calls are extracted from the structured v3 trace format.
|
|
47
|
+
# The trace must be requested with return_trace=True for this to work.
|
|
48
|
+
#
|
|
49
|
+
# Trace structure:
|
|
50
|
+
# trace.event_history[] - list of events (policy calls, env steps)
|
|
51
|
+
# ├─ event.call_records[] - LLM calls made during this event
|
|
52
|
+
# ├─ call_record.output_tool_calls[] - tool calls from LLM response
|
|
53
|
+
# ├─ tool_call.name - function name (e.g., "interact_many")
|
|
54
|
+
# └─ tool_call.arguments_json - JSON string of arguments
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
## Key Implementation Details
|
|
58
|
+
|
|
59
|
+
### Tool Call Display
|
|
60
|
+
|
|
61
|
+
**Requirements:**
|
|
62
|
+
1. `return_trace = true` in config (CRITICAL - without this, no tool calls)
|
|
63
|
+
2. v3 trace format (`trace_format="structured"`)
|
|
64
|
+
3. Mock proxy or real inference (direct API calls don't populate traces correctly)
|
|
65
|
+
|
|
66
|
+
**Data Flow:**
|
|
67
|
+
```
|
|
68
|
+
1. Rollout request with return_trace=True
|
|
69
|
+
↓
|
|
70
|
+
2. Task app makes LLM calls, captures responses
|
|
71
|
+
↓
|
|
72
|
+
3. LLM responses include tool_calls
|
|
73
|
+
↓
|
|
74
|
+
4. Task app stores call_records in event_history
|
|
75
|
+
↓
|
|
76
|
+
5. Smoke command extracts from trace.event_history[].call_records[].output_tool_calls[]
|
|
77
|
+
↓
|
|
78
|
+
6. Display: TOOL_CALL[N]: function_name({...args})
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Diagnostic Suppression
|
|
82
|
+
|
|
83
|
+
**Permanently disabled (commented out, not deleted):**
|
|
84
|
+
- `synth_ai/tracing_v3/config.py:21` - `[TRACING_V3_CONFIG_LOADED]`
|
|
85
|
+
- `synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py` - All `[PATCH]` messages
|
|
86
|
+
- `synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py` - All `[PATCH]` messages
|
|
87
|
+
- `synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py` - All `[PATCH]` messages
|
|
88
|
+
|
|
89
|
+
**Why commented, not deleted?**
|
|
90
|
+
- Preserves context for debugging
|
|
91
|
+
- Shows what messages existed
|
|
92
|
+
- Easy to re-enable if needed
|
|
93
|
+
|
|
94
|
+
### Background Service Management
|
|
95
|
+
|
|
96
|
+
**Task App:**
|
|
97
|
+
- Runs from synth-ai root (required for discovery)
|
|
98
|
+
- Uses `nohup` for detachment
|
|
99
|
+
- Output → `nohup_task_app.out`
|
|
100
|
+
- Health check accepts 200 or 400 (400 = server up, auth failing)
|
|
101
|
+
- 120s timeout with progress updates
|
|
102
|
+
|
|
103
|
+
**sqld:**
|
|
104
|
+
- Dual ports: 8080 (Hrana WebSocket), 8081 (HTTP)
|
|
105
|
+
- Health check: `GET http://127.0.0.1:8081/health`
|
|
106
|
+
- 30s timeout
|
|
107
|
+
- Auto-cleanup of existing processes
|
|
108
|
+
|
|
109
|
+
## Configuration Reference
|
|
110
|
+
|
|
111
|
+
### Critical Settings
|
|
112
|
+
|
|
113
|
+
```toml
|
|
114
|
+
[smoke]
|
|
115
|
+
# Auto-start services
|
|
116
|
+
task_app_name = "grpo-crafter" # Task app to serve
|
|
117
|
+
task_app_port = 8765
|
|
118
|
+
task_app_env_file = ".env" # Required for this app
|
|
119
|
+
sqld_auto_start = true
|
|
120
|
+
|
|
121
|
+
# Inference - REAL OpenAI
|
|
122
|
+
model = "gpt-4o-mini" # Actual model used
|
|
123
|
+
mock_backend = "openai" # Route through OpenAI API
|
|
124
|
+
use_mock = true # Enable mock proxy
|
|
125
|
+
|
|
126
|
+
# CRITICAL for tool call display
|
|
127
|
+
return_trace = true # Must be true!
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
### Optional Settings
|
|
131
|
+
|
|
132
|
+
All `[smoke]` parameters are optional - CLI args override TOML values:
|
|
133
|
+
|
|
134
|
+
```bash
|
|
135
|
+
# Override max steps
|
|
136
|
+
uv run synth-ai smoke --config configs/smoke_test.toml --max-steps 5
|
|
137
|
+
|
|
138
|
+
# Use different model
|
|
139
|
+
uv run synth-ai smoke --config configs/smoke_test.toml --model gpt-4o
|
|
140
|
+
|
|
141
|
+
# Disable mock (use direct API - won't show tool calls properly)
|
|
142
|
+
uv run synth-ai smoke --config configs/smoke_test.toml --no-mock
|
|
143
|
+
```
|
|
144
|
+
|
|
145
|
+
## Troubleshooting
|
|
146
|
+
|
|
147
|
+
### No tool calls displayed
|
|
148
|
+
|
|
149
|
+
**Symptom:** `⚠ No tool calls found in trace`
|
|
150
|
+
|
|
151
|
+
**Solutions:**
|
|
152
|
+
1. Verify `return_trace = true` in config
|
|
153
|
+
2. Check `v3_traces=1/1` in output (should match successes)
|
|
154
|
+
3. Ensure `use_mock = true` or using mock proxy
|
|
155
|
+
4. Check task app logs: `cat /path/to/synth-ai/nohup_task_app.out`
|
|
156
|
+
|
|
157
|
+
### Task app exits immediately
|
|
158
|
+
|
|
159
|
+
**Symptom:** `0 steps`, process not running
|
|
160
|
+
|
|
161
|
+
**Solutions:**
|
|
162
|
+
1. Verify task app name: `synth-ai task-app list`
|
|
163
|
+
2. Check .env file exists at `task_app_env_file` path
|
|
164
|
+
3. Ensure running from correct directory
|
|
165
|
+
4. Manual test: `cd /synth-ai && uvx synth-ai task-app serve grpo-crafter --port 8765 --env-file /path/.env --force`
|
|
166
|
+
|
|
167
|
+
### Port conflicts
|
|
168
|
+
|
|
169
|
+
**Symptom:** `Address already in use`
|
|
170
|
+
|
|
171
|
+
**Solution:** Auto-cleanup should handle this, but manual cleanup:
|
|
172
|
+
```bash
|
|
173
|
+
lsof -ti :8080 | xargs kill -9
|
|
174
|
+
lsof -ti :8081 | xargs kill -9
|
|
175
|
+
lsof -ti :8765 | xargs kill -9
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
## Testing
|
|
179
|
+
|
|
180
|
+
### Unit Tests
|
|
181
|
+
|
|
182
|
+
- `tests/unit/test_train_validation.py::test_rl_config_with_smoke_section` - Validates `[smoke]` section parsing
|
|
183
|
+
- `tests/unit/test_smoke_config.py` - Comprehensive Pydantic validation tests
|
|
184
|
+
|
|
185
|
+
### Integration Test
|
|
186
|
+
|
|
187
|
+
```bash
|
|
188
|
+
cd examples/blog_posts/warming_up_to_rl
|
|
189
|
+
uv run synth-ai smoke --config configs/smoke_test.toml
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
**Expected result:**
|
|
193
|
+
- ✅ Services start successfully
|
|
194
|
+
- ✅ 10 tool calls displayed
|
|
195
|
+
- ✅ `v3_traces=1/1`
|
|
196
|
+
- ✅ `successes=1/1`
|
|
197
|
+
- ✅ `nonzero_returns=1/1`
|
|
198
|
+
|
|
199
|
+
## Files Modified
|
|
200
|
+
|
|
201
|
+
### Core Implementation
|
|
202
|
+
- `synth_ai/cli/commands/smoke/core.py` - Tool call extraction, auto-start logic
|
|
203
|
+
- `synth_ai/api/train/configs/rl.py` - `SmokeConfig` Pydantic model
|
|
204
|
+
- `synth_ai/api/train/builders.py` - Remove `[smoke]` before sending to trainer
|
|
205
|
+
|
|
206
|
+
### Diagnostic Suppression
|
|
207
|
+
- `synth_ai/tracing_v3/config.py` - Commented out `[TRACING_V3_CONFIG_LOADED]`
|
|
208
|
+
- `synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py` - Commented out `[PATCH]`
|
|
209
|
+
- `synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py` - Commented out `[PATCH]`
|
|
210
|
+
- `synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py` - Commented out `[PATCH]`
|
|
211
|
+
|
|
212
|
+
### Documentation
|
|
213
|
+
- `examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md` - User guide
|
|
214
|
+
- `examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md` - Developer guide
|
|
215
|
+
- `examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml` - Example config
|
|
216
|
+
- `examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml` - Inline docs
|
|
217
|
+
- `monorepo/docs/cli/smoke.mdx` - Mintlify CLI reference
|
|
218
|
+
|
|
219
|
+
### Tests
|
|
220
|
+
- `tests/unit/test_train_validation.py` - Added smoke section test
|
|
221
|
+
- `tests/unit/test_smoke_config.py` - Comprehensive smoke config tests
|
|
222
|
+
|
|
223
|
+
## Future Improvements
|
|
224
|
+
|
|
225
|
+
Ideas for future agents:
|
|
226
|
+
|
|
227
|
+
1. **Streaming display** - Show tool calls as they happen, not just at end
|
|
228
|
+
2. **Tool call validation** - Verify format matches environment expectations
|
|
229
|
+
3. **Performance metrics** - Track inference latency per call
|
|
230
|
+
4. **Cost tracking** - Display OpenAI API costs
|
|
231
|
+
5. **Parallel rollouts** - Support concurrent execution testing
|
|
232
|
+
6. **Vision support** - Save observations for vision-based tasks
|
|
233
|
+
7. **Interactive mode** - Step through rollout one action at a time
|
|
234
|
+
8. **Replay mode** - Re-run saved traces for debugging
|
|
235
|
+
|
|
236
|
+
## Success Criteria Met
|
|
237
|
+
|
|
238
|
+
✅ **Tool calls visible** - All 10 calls displayed with arguments
|
|
239
|
+
✅ **Real inference** - OpenAI gpt-4o-mini executing actual tool calls
|
|
240
|
+
✅ **Clean output** - No diagnostic noise
|
|
241
|
+
✅ **Auto-start** - Background services managed automatically
|
|
242
|
+
✅ **Well documented** - Comprehensive docs for users and developers
|
|
243
|
+
✅ **Robust** - Error handling, health checks, timeouts
|
|
244
|
+
✅ **Tested** - Unit tests and working integration test
|
|
245
|
+
|
|
246
|
+
## Contact
|
|
247
|
+
|
|
248
|
+
For questions or issues, see:
|
|
249
|
+
- Architecture details: `ARCHITECTURE.md`
|
|
250
|
+
- User guide: `SMOKE_TESTING.md`
|
|
251
|
+
- CLI reference: `monorepo/docs/cli/smoke.mdx`
|
|
252
|
+
|
|
253
|
+
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[eval]
|
|
2
|
+
app_id = "grpo-crafter"
|
|
3
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
|
|
4
|
+
model = "Qwen/Qwen3-4B"
|
|
5
|
+
seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
6
|
+
max_turns = 20
|
|
7
|
+
concurrency = 1
|
|
8
|
+
env_name = "crafter"
|
|
9
|
+
policy_name = "crafter-react"
|
|
10
|
+
trace_format = "structured"
|
|
11
|
+
return_trace = true
|
|
12
|
+
|
|
13
|
+
[eval.policy_config]
|
|
14
|
+
provider = "synth"
|
|
15
|
+
model = "Qwen/Qwen3-4B"
|
|
16
|
+
inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
|
|
17
|
+
temperature = 0.6
|
|
18
|
+
top_p = 0.95
|
|
19
|
+
max_tokens = 2048
|
|
20
|
+
use_vision = false
|
|
21
|
+
image_only_mode = false
|
|
22
|
+
max_llm_calls = 10
|
|
23
|
+
|
|
24
|
+
[eval.env_config.env_params]
|
|
25
|
+
max_steps_per_episode = 20
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
# Evaluate the finetuned Qwen3-4B checkpoint on Crafter.
|
|
2
|
+
# Replace model with the fft: job id returned by the SFT run.
|
|
3
|
+
|
|
4
|
+
[eval]
|
|
5
|
+
app_id = "grpo-crafter"
|
|
6
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
|
|
7
|
+
model = "fft:REPLACE-WITH-SFT-JOB-ID"
|
|
8
|
+
seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
9
|
+
max_turns = 10
|
|
10
|
+
concurrency = 4
|
|
11
|
+
env_name = "crafter"
|
|
12
|
+
policy_name = "crafter-react"
|
|
13
|
+
trace_format = "compact"
|
|
14
|
+
return_trace = false
|
|
15
|
+
|
|
16
|
+
[eval.policy_config]
|
|
17
|
+
provider = "synth"
|
|
18
|
+
model = "fft:REPLACE-WITH-SFT-JOB-ID"
|
|
19
|
+
temperature = 0.2
|
|
20
|
+
top_p = 0.8
|
|
21
|
+
max_tokens = 512
|
|
22
|
+
use_vision = true
|
|
23
|
+
image_only_mode = false
|
|
24
|
+
max_llm_calls = 10
|
|
25
|
+
tool_choice = "auto"
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
[eval]
|
|
2
|
+
app_id = "grpo-crafter"
|
|
3
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
|
|
4
|
+
model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
|
|
5
|
+
seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
6
|
+
max_turns = 20
|
|
7
|
+
concurrency = 2
|
|
8
|
+
env_name = "crafter"
|
|
9
|
+
policy_name = "crafter-react"
|
|
10
|
+
trace_format = "structured"
|
|
11
|
+
return_trace = true
|
|
12
|
+
|
|
13
|
+
[eval.policy_config]
|
|
14
|
+
provider = "synth"
|
|
15
|
+
model = "peft:Qwen/Qwen3-4B:job_f774218e6c954517"
|
|
16
|
+
inference_url = "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run"
|
|
17
|
+
temperature = 0.2
|
|
18
|
+
top_p = 0.8
|
|
19
|
+
max_tokens = 1024
|
|
20
|
+
use_vision = false
|
|
21
|
+
image_only_mode = false
|
|
22
|
+
max_llm_calls = 10
|
|
23
|
+
tool_choice = "auto"
|
|
24
|
+
|
|
25
|
+
[eval.env_config.env_params]
|
|
26
|
+
max_steps_per_episode = 20
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
[eval]
|
|
2
|
+
app_id = "grpo-crafter"
|
|
3
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
|
|
4
|
+
model = "qwen/qwen3-32b"
|
|
5
|
+
seeds = [ 0, 1, 2,]
|
|
6
|
+
max_turns = 10
|
|
7
|
+
concurrency = 1
|
|
8
|
+
env_name = "crafter"
|
|
9
|
+
policy_name = "crafter-react"
|
|
10
|
+
trace_format = "full"
|
|
11
|
+
return_trace = true
|
|
12
|
+
|
|
13
|
+
[eval.policy_config]
|
|
14
|
+
provider = "groq"
|
|
15
|
+
model = "qwen/qwen3-32b"
|
|
16
|
+
inference_url = "https://api.groq.com/openai"
|
|
17
|
+
temperature = 0.6
|
|
18
|
+
top_p = 0.95
|
|
19
|
+
max_tokens = 8192
|
|
20
|
+
use_vision = false
|
|
21
|
+
image_only_mode = false
|
|
22
|
+
max_llm_calls = 10
|
|
23
|
+
|
|
24
|
+
[eval.env_config.env_params]
|
|
25
|
+
max_steps_per_episode = 10
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# Crafter rollout config for GPT-OSS-120B served from OpenAI-compatible APIs.
|
|
2
|
+
# Replace the task_app_url with your deployed Crafter task app URL.
|
|
3
|
+
# The run stores full traces so we can keep the LLM reasoning for fine-tuning.
|
|
4
|
+
|
|
5
|
+
[eval]
|
|
6
|
+
app_id = "grpo-crafter"
|
|
7
|
+
task_app_url = "https://synth-laboratories--grpo-crafter-task-app-fastapi-app-dev.modal.run"
|
|
8
|
+
model = "openai/gpt-oss-120b"
|
|
9
|
+
seeds = [0, 1, 2]
|
|
10
|
+
max_turns = 10
|
|
11
|
+
concurrency = 1
|
|
12
|
+
env_name = "crafter"
|
|
13
|
+
policy_name = "crafter-react"
|
|
14
|
+
trace_format = "full"
|
|
15
|
+
return_trace = true
|
|
16
|
+
|
|
17
|
+
[eval.env_config]
|
|
18
|
+
env_params = { max_steps_per_episode = 10 }
|
|
19
|
+
|
|
20
|
+
[eval.policy_config]
|
|
21
|
+
provider = "groq"
|
|
22
|
+
model = "openai/gpt-oss-120b"
|
|
23
|
+
inference_url = "https://api.groq.com/openai"
|
|
24
|
+
temperature = 0.6
|
|
25
|
+
top_p = 0.9
|
|
26
|
+
max_tokens = 768
|
|
27
|
+
use_vision = false
|
|
28
|
+
image_only_mode = false
|
|
29
|
+
max_llm_calls = 10
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
# Filters Crafter traces into an instruction-tuning dataset.
|
|
2
|
+
# Assumes you stored rollouts in traces/v3/crafter_blog.db via `uvx synth-ai eval`.
|
|
3
|
+
|
|
4
|
+
[filter]
|
|
5
|
+
db = "sqlite+libsql://http://127.0.0.1:8080"
|
|
6
|
+
output = "examples/blog_posts/warming_up_to_rl/ft_data/crafter_blog_high_reward.jsonl"
|
|
7
|
+
min_official_score = 0.1
|
|
8
|
+
models = ["qwen/qwen3-32b", "openai/gpt-oss-120b"]
|
|
9
|
+
shuffle = true
|
|
10
|
+
shuffle_seed = 42
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
# Example RL config with smoke testing enabled
|
|
2
|
+
# This config demonstrates auto-starting task app and sqld for easy smoke testing
|
|
3
|
+
|
|
4
|
+
type = "rl"
|
|
5
|
+
|
|
6
|
+
# Smoke testing configuration - AUTO-STARTS services in background!
|
|
7
|
+
[smoke]
|
|
8
|
+
# Auto-start the task app server
|
|
9
|
+
task_app_name = "grpo-crafter" # Your task app name (use "synth-ai task-app list" to see available apps)
|
|
10
|
+
task_app_port = 8765
|
|
11
|
+
task_app_env_file = ".env" # Required for this task app
|
|
12
|
+
task_app_force = true # Kill any existing process on this port
|
|
13
|
+
|
|
14
|
+
# Auto-start sqld for tracing
|
|
15
|
+
sqld_auto_start = true
|
|
16
|
+
sqld_db_path = "./traces/local.db"
|
|
17
|
+
sqld_hrana_port = 8080
|
|
18
|
+
sqld_http_port = 8081
|
|
19
|
+
|
|
20
|
+
# Test parameters
|
|
21
|
+
env_name = "crafter"
|
|
22
|
+
policy_name = "crafter-react"
|
|
23
|
+
max_steps = 10
|
|
24
|
+
policy = "gpt-5-nano" # Use gpt-5-nano policy with mock backend
|
|
25
|
+
model = "gpt-4o-mini" # Real model to use via OpenAI
|
|
26
|
+
mock_backend = "openai" # Use OpenAI backend for real inference and tool calls
|
|
27
|
+
return_trace = true
|
|
28
|
+
use_mock = true # Use mock proxy that routes to OpenAI
|
|
29
|
+
|
|
30
|
+
# RL Training Configuration (used by actual training, not smoke tests)
|
|
31
|
+
[algorithm]
|
|
32
|
+
type = "online"
|
|
33
|
+
method = "policy_gradient"
|
|
34
|
+
variety = "gspo"
|
|
35
|
+
|
|
36
|
+
[policy]
|
|
37
|
+
model_name = "Qwen/Qwen3-4B"
|
|
38
|
+
trainer_mode = "full"
|
|
39
|
+
label = "crafter-rl-demo"
|
|
40
|
+
|
|
41
|
+
[compute]
|
|
42
|
+
gpu_type = "H100"
|
|
43
|
+
gpu_count = 2
|
|
44
|
+
|
|
45
|
+
[compute.topology]
|
|
46
|
+
type = "single_node_split"
|
|
47
|
+
gpus_for_vllm = 1
|
|
48
|
+
gpus_for_training = 1
|
|
49
|
+
|
|
50
|
+
[services]
|
|
51
|
+
task_url = "http://localhost:8765"
|
|
52
|
+
|
|
53
|
+
[rollout]
|
|
54
|
+
env_name = "crafter"
|
|
55
|
+
policy_name = "crafter-react"
|
|
56
|
+
max_turns = 10
|
|
57
|
+
episodes_per_batch = 16
|
|
58
|
+
max_concurrent_rollouts = 4
|
|
59
|
+
task_app_origin_rewards_only = true
|
|
60
|
+
|
|
61
|
+
[training]
|
|
62
|
+
num_epochs = 1
|
|
63
|
+
iterations_per_epoch = 10
|
|
64
|
+
max_turns = 10
|
|
65
|
+
batch_size = 4
|
|
66
|
+
group_size = 4
|
|
67
|
+
learning_rate = 5e-5
|
|
68
|
+
weight_sync_interval = 1
|
|
69
|
+
log_interval = 1
|
|
70
|
+
|
|
71
|
+
[evaluation]
|
|
72
|
+
instances = 2
|
|
73
|
+
every_n_iters = 1
|
|
74
|
+
seeds = [0, 1]
|
|
75
|
+
|