synth-ai 0.2.8.dev4__py3-none-any.whl ā 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +63 -0
- synth_ai/api/train/builders.py +473 -0
- synth_ai/api/train/cli.py +1185 -0
- synth_ai/api/train/config_finder.py +246 -0
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +352 -0
- synth_ai/api/train/pollers.py +91 -0
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +195 -0
- synth_ai/api/train/utils.py +244 -0
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +90 -45
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +166 -114
- synth_ai/cli/root.py +143 -112
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +3134 -0
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +745 -416
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +7 -1
- synth_ai/demos/demo_task_apps/core.py +75 -37
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/config.toml +55 -110
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl ā learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl ā learning/rl}/contracts.py +5 -10
- synth_ai/{rl ā learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +116 -3
- synth_ai/task/apps/__init__.py +132 -0
- synth_ai/task/auth.py +165 -0
- synth_ai/task/client.py +167 -0
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +173 -57
- synth_ai/task/datasets.py +108 -0
- synth_ai/task/errors.py +50 -0
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +111 -0
- synth_ai/task/proxy.py +251 -0
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/server.py +432 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +95 -0
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +59 -0
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +86 -21
- synth_ai/tracing_v3/storage/base.py +98 -12
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.8.dev4.dist-info ā synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.8.dev4.dist-info ā synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -63
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -760
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
- synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
- {synth_ai/lm/caching ā examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost ā examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs ā examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors ā examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core ā examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local ā examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported ā examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing ā examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py ā cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py ā py.typed} +0 -0
- {synth_ai-0.2.8.dev4.dist-info ā synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.8.dev4.dist-info ā synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,386 @@
|
|
|
1
|
+
"""TOML validation logic for train commands (SFT and RL)."""
|
|
2
|
+
|
|
3
|
+
from collections.abc import MutableMapping
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pydantic import ValidationError
|
|
8
|
+
from synth_ai.api.train.configs.rl import RLConfig
|
|
9
|
+
from synth_ai.api.train.configs.sft import SFTConfig
|
|
10
|
+
from synth_ai.api.train.utils import load_toml
|
|
11
|
+
|
|
12
|
+
from .errors import (
|
|
13
|
+
InvalidJudgeConfigError,
|
|
14
|
+
InvalidRLConfigError,
|
|
15
|
+
InvalidRubricConfigError,
|
|
16
|
+
InvalidSFTConfigError,
|
|
17
|
+
MissingAlgorithmError,
|
|
18
|
+
MissingComputeError,
|
|
19
|
+
MissingDatasetError,
|
|
20
|
+
MissingModelError,
|
|
21
|
+
TomlParseError,
|
|
22
|
+
UnsupportedAlgorithmError,
|
|
23
|
+
)
|
|
24
|
+
from .judge_validation import extract_and_validate_judge_rubric
|
|
25
|
+
|
|
26
|
+
__all__ = [
|
|
27
|
+
"validate_sft_config",
|
|
28
|
+
"validate_rl_config",
|
|
29
|
+
"load_and_validate_sft",
|
|
30
|
+
"load_and_validate_rl",
|
|
31
|
+
]
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def validate_sft_config(config: MutableMapping[str, Any]) -> dict[str, Any]:
|
|
35
|
+
"""Validate SFT configuration from TOML.
|
|
36
|
+
|
|
37
|
+
Args:
|
|
38
|
+
config: Raw configuration dictionary from TOML
|
|
39
|
+
|
|
40
|
+
Returns:
|
|
41
|
+
Validated configuration dictionary
|
|
42
|
+
|
|
43
|
+
Raises:
|
|
44
|
+
InvalidSFTConfigError: If validation fails
|
|
45
|
+
MissingAlgorithmError: If algorithm section is missing or invalid
|
|
46
|
+
MissingModelError: If model is not specified
|
|
47
|
+
MissingDatasetError: If dataset path is not specified
|
|
48
|
+
MissingComputeError: If compute section is missing required fields
|
|
49
|
+
"""
|
|
50
|
+
# Check for required top-level sections
|
|
51
|
+
if "algorithm" not in config or not config["algorithm"]:
|
|
52
|
+
raise MissingAlgorithmError(
|
|
53
|
+
detail="[algorithm] section is required for SFT configs"
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
if "job" not in config or not config["job"]:
|
|
57
|
+
raise InvalidSFTConfigError(
|
|
58
|
+
detail="[job] section is required for SFT configs"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
job = config.get("job", {})
|
|
62
|
+
if not job.get("model"):
|
|
63
|
+
raise MissingModelError(
|
|
64
|
+
detail="[job].model is required (e.g., 'Qwen/Qwen3-4B')"
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Check that at least one dataset source is specified
|
|
68
|
+
if not (job.get("data") or job.get("data_path")):
|
|
69
|
+
raise MissingDatasetError(
|
|
70
|
+
detail="[job].data or [job].data_path must be specified",
|
|
71
|
+
hint="Provide path to training JSONL file"
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
# Validate algorithm type, method, and variety
|
|
75
|
+
algorithm = config.get("algorithm", {})
|
|
76
|
+
if algorithm.get("type") not in {"offline", None}:
|
|
77
|
+
raise UnsupportedAlgorithmError(
|
|
78
|
+
algorithm_type=algorithm.get("type", "unknown"),
|
|
79
|
+
expected="offline",
|
|
80
|
+
hint="SFT requires algorithm.type = 'offline'"
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
method = algorithm.get("method", "")
|
|
84
|
+
if method and method not in {"sft", "supervised_finetune"}:
|
|
85
|
+
raise UnsupportedAlgorithmError(
|
|
86
|
+
algorithm_type=method,
|
|
87
|
+
expected="sft or supervised_finetune",
|
|
88
|
+
hint="SFT requires algorithm.method = 'sft' or 'supervised_finetune'"
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# Validate variety is present
|
|
92
|
+
if not algorithm.get("variety"):
|
|
93
|
+
raise MissingAlgorithmError(
|
|
94
|
+
detail="[algorithm].variety is required (e.g., 'fft', 'lora', 'qlora')"
|
|
95
|
+
)
|
|
96
|
+
|
|
97
|
+
# Validate compute section
|
|
98
|
+
compute = config.get("compute", {})
|
|
99
|
+
if not compute:
|
|
100
|
+
raise MissingComputeError(
|
|
101
|
+
detail="[compute] section is required",
|
|
102
|
+
hint="Specify gpu_type, gpu_count, and nodes"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
if not compute.get("gpu_type"):
|
|
106
|
+
raise MissingComputeError(
|
|
107
|
+
detail="[compute].gpu_type is required (e.g., 'H100', 'A100')"
|
|
108
|
+
)
|
|
109
|
+
|
|
110
|
+
if not compute.get("gpu_count"):
|
|
111
|
+
raise MissingComputeError(
|
|
112
|
+
detail="[compute].gpu_count is required"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
# Validate using Pydantic model
|
|
116
|
+
try:
|
|
117
|
+
validated = SFTConfig.from_mapping(config)
|
|
118
|
+
return validated.to_dict()
|
|
119
|
+
except ValidationError as exc:
|
|
120
|
+
errors = []
|
|
121
|
+
for error in exc.errors():
|
|
122
|
+
loc = ".".join(str(x) for x in error["loc"])
|
|
123
|
+
msg = error["msg"]
|
|
124
|
+
errors.append(f" ⢠{loc}: {msg}")
|
|
125
|
+
raise InvalidSFTConfigError(
|
|
126
|
+
detail="Pydantic validation failed:\n" + "\n".join(errors)
|
|
127
|
+
) from exc
|
|
128
|
+
|
|
129
|
+
|
|
130
|
+
def validate_rl_config(config: MutableMapping[str, Any]) -> dict[str, Any]:
|
|
131
|
+
"""Validate RL configuration from TOML.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
config: Raw configuration dictionary from TOML
|
|
135
|
+
|
|
136
|
+
Returns:
|
|
137
|
+
Validated configuration dictionary
|
|
138
|
+
|
|
139
|
+
Raises:
|
|
140
|
+
InvalidRLConfigError: If validation fails
|
|
141
|
+
MissingAlgorithmError: If algorithm section is missing or invalid
|
|
142
|
+
MissingModelError: If model is not specified
|
|
143
|
+
MissingComputeError: If compute section is missing required fields
|
|
144
|
+
"""
|
|
145
|
+
# Check for required top-level sections
|
|
146
|
+
if "algorithm" not in config or not config["algorithm"]:
|
|
147
|
+
raise MissingAlgorithmError(
|
|
148
|
+
detail="[algorithm] section is required for RL configs"
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
# Check for model OR policy (policy is the new format)
|
|
152
|
+
if "policy" not in config and "model" not in config:
|
|
153
|
+
raise MissingModelError(
|
|
154
|
+
detail="[policy] or [model] section is required for RL configs"
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# Validate algorithm type, method, and variety
|
|
158
|
+
algorithm = config.get("algorithm", {})
|
|
159
|
+
if algorithm.get("type") not in {"online", None}:
|
|
160
|
+
raise UnsupportedAlgorithmError(
|
|
161
|
+
algorithm_type=algorithm.get("type", "unknown"),
|
|
162
|
+
expected="online",
|
|
163
|
+
hint="RL requires algorithm.type = 'online'"
|
|
164
|
+
)
|
|
165
|
+
|
|
166
|
+
method = algorithm.get("method", "")
|
|
167
|
+
if method and method not in {"policy_gradient", "ppo", "gspo"}:
|
|
168
|
+
raise UnsupportedAlgorithmError(
|
|
169
|
+
algorithm_type=method,
|
|
170
|
+
expected="policy_gradient",
|
|
171
|
+
hint="RL requires algorithm.method = 'policy_gradient'"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
# Validate variety is present
|
|
175
|
+
if not algorithm.get("variety"):
|
|
176
|
+
raise MissingAlgorithmError(
|
|
177
|
+
detail="[algorithm].variety is required (e.g., 'gspo', 'ppo')"
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Validate model/policy section
|
|
181
|
+
model = config.get("model", {})
|
|
182
|
+
policy = config.get("policy", {})
|
|
183
|
+
|
|
184
|
+
# Use policy if available, otherwise fall back to model
|
|
185
|
+
if policy:
|
|
186
|
+
if not policy.get("model_name") and not policy.get("source"):
|
|
187
|
+
raise MissingModelError(
|
|
188
|
+
detail="[policy].model_name or [policy].source must be specified",
|
|
189
|
+
hint="Provide base model (e.g., 'Qwen/Qwen3-4B') or source checkpoint"
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
if not policy.get("trainer_mode"):
|
|
193
|
+
raise InvalidRLConfigError(
|
|
194
|
+
detail="[policy].trainer_mode is required (e.g., 'full', 'lora')"
|
|
195
|
+
)
|
|
196
|
+
|
|
197
|
+
if not policy.get("label"):
|
|
198
|
+
raise InvalidRLConfigError(
|
|
199
|
+
detail="[policy].label is required (e.g., 'my-rl-model')",
|
|
200
|
+
hint="Provide a descriptive label for this model"
|
|
201
|
+
)
|
|
202
|
+
elif model:
|
|
203
|
+
if not model.get("base") and not model.get("source"):
|
|
204
|
+
raise MissingModelError(
|
|
205
|
+
detail="[model].base or [model].source must be specified",
|
|
206
|
+
hint="Provide base model (e.g., 'Qwen/Qwen3-4B') or source checkpoint"
|
|
207
|
+
)
|
|
208
|
+
|
|
209
|
+
if not model.get("trainer_mode"):
|
|
210
|
+
raise InvalidRLConfigError(
|
|
211
|
+
detail="[model].trainer_mode is required (e.g., 'full', 'lora')"
|
|
212
|
+
)
|
|
213
|
+
|
|
214
|
+
if not model.get("label"):
|
|
215
|
+
raise InvalidRLConfigError(
|
|
216
|
+
detail="[model].label is required (e.g., 'my-rl-model')",
|
|
217
|
+
hint="Provide a descriptive label for this model"
|
|
218
|
+
)
|
|
219
|
+
|
|
220
|
+
# Validate compute section
|
|
221
|
+
compute = config.get("compute", {})
|
|
222
|
+
if not compute:
|
|
223
|
+
raise MissingComputeError(
|
|
224
|
+
detail="[compute] section is required",
|
|
225
|
+
hint="Specify gpu_type and gpu_count"
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
if not compute.get("gpu_type"):
|
|
229
|
+
raise MissingComputeError(
|
|
230
|
+
detail="[compute].gpu_type is required (e.g., 'H100', 'A100')"
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
if not compute.get("gpu_count"):
|
|
234
|
+
raise MissingComputeError(
|
|
235
|
+
detail="[compute].gpu_count is required"
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
# Check for rollout configuration
|
|
239
|
+
rollout = config.get("rollout", {})
|
|
240
|
+
if not rollout:
|
|
241
|
+
raise InvalidRLConfigError(
|
|
242
|
+
detail="[rollout] section is required for RL configs",
|
|
243
|
+
hint="Specify env_name, policy_name, max_turns, etc."
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
if not rollout.get("env_name"):
|
|
247
|
+
raise InvalidRLConfigError(
|
|
248
|
+
detail="[rollout].env_name is required (e.g., 'math', 'crafter')"
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
if not rollout.get("policy_name"):
|
|
252
|
+
raise InvalidRLConfigError(
|
|
253
|
+
detail="[rollout].policy_name is required"
|
|
254
|
+
)
|
|
255
|
+
|
|
256
|
+
# Validate topology section (can be top-level or under compute)
|
|
257
|
+
topology = config.get("topology") or compute.get("topology", {})
|
|
258
|
+
if not topology:
|
|
259
|
+
raise InvalidRLConfigError(
|
|
260
|
+
detail="[topology] or [compute.topology] section is required",
|
|
261
|
+
hint="Specify gpus_for_vllm, gpus_for_training, etc."
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Check for training section and its required fields
|
|
265
|
+
training = config.get("training", {})
|
|
266
|
+
if training:
|
|
267
|
+
required_training_fields = {
|
|
268
|
+
"num_epochs": "number of training epochs",
|
|
269
|
+
"iterations_per_epoch": "iterations per epoch",
|
|
270
|
+
"max_turns": "maximum turns",
|
|
271
|
+
"batch_size": "batch size",
|
|
272
|
+
"group_size": "group size",
|
|
273
|
+
"learning_rate": "learning rate",
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
for field, description in required_training_fields.items():
|
|
277
|
+
if field not in training:
|
|
278
|
+
raise InvalidRLConfigError(
|
|
279
|
+
detail=f"[training].{field} is required ({description})",
|
|
280
|
+
hint=f"Add {field} to the [training] section"
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
# Check for evaluation section
|
|
284
|
+
evaluation = config.get("evaluation", {})
|
|
285
|
+
if evaluation:
|
|
286
|
+
required_eval_fields = {
|
|
287
|
+
"instances": "number of evaluation instances",
|
|
288
|
+
"every_n_iters": "evaluation frequency",
|
|
289
|
+
"seeds": "evaluation seeds",
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
for field, description in required_eval_fields.items():
|
|
293
|
+
if field not in evaluation:
|
|
294
|
+
raise InvalidRLConfigError(
|
|
295
|
+
detail=f"[evaluation].{field} is required ({description})",
|
|
296
|
+
hint=f"Add {field} to the [evaluation] section"
|
|
297
|
+
)
|
|
298
|
+
|
|
299
|
+
# Inject services section if not present (will be populated at runtime)
|
|
300
|
+
if "services" not in config:
|
|
301
|
+
config["services"] = {
|
|
302
|
+
"task_url": "placeholder", # Will be resolved at runtime
|
|
303
|
+
}
|
|
304
|
+
|
|
305
|
+
# Inject reference placement if not present (like builders.py does)
|
|
306
|
+
# Reference is now under compute.topology.reference_placement
|
|
307
|
+
if "compute" not in config:
|
|
308
|
+
config["compute"] = {}
|
|
309
|
+
if "topology" not in config["compute"]:
|
|
310
|
+
config["compute"]["topology"] = {}
|
|
311
|
+
if "reference_placement" not in config["compute"]["topology"]:
|
|
312
|
+
config["compute"]["topology"]["reference_placement"] = "none"
|
|
313
|
+
|
|
314
|
+
# Validate judge/rubric configuration with formalized Pydantic models
|
|
315
|
+
# This will emit deprecation warnings for dead fields and validate structure
|
|
316
|
+
try:
|
|
317
|
+
rubric_config, judge_config = extract_and_validate_judge_rubric(config)
|
|
318
|
+
# Validation passed - configs are clean and ready for use
|
|
319
|
+
# The validated Pydantic models can be used by training code if needed
|
|
320
|
+
except (InvalidJudgeConfigError, InvalidRubricConfigError) as exc:
|
|
321
|
+
raise InvalidRLConfigError(
|
|
322
|
+
detail=f"Judge/Rubric validation failed: {exc.detail}",
|
|
323
|
+
hint="Check JUDGE_RUBRIC_CLEANUP_GUIDE.md for migration help."
|
|
324
|
+
) from exc
|
|
325
|
+
|
|
326
|
+
# Validate using Pydantic model
|
|
327
|
+
try:
|
|
328
|
+
validated = RLConfig.from_mapping(config)
|
|
329
|
+
return validated.to_dict()
|
|
330
|
+
except ValidationError as exc:
|
|
331
|
+
errors = []
|
|
332
|
+
for error in exc.errors():
|
|
333
|
+
loc = ".".join(str(x) for x in error["loc"])
|
|
334
|
+
msg = error["msg"]
|
|
335
|
+
errors.append(f" ⢠{loc}: {msg}")
|
|
336
|
+
raise InvalidRLConfigError(
|
|
337
|
+
detail="Pydantic validation failed:\n" + "\n".join(errors)
|
|
338
|
+
) from exc
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def load_and_validate_sft(config_path: Path) -> dict[str, Any]:
|
|
342
|
+
"""Load and validate an SFT TOML configuration file.
|
|
343
|
+
|
|
344
|
+
Args:
|
|
345
|
+
config_path: Path to TOML configuration file
|
|
346
|
+
|
|
347
|
+
Returns:
|
|
348
|
+
Validated configuration dictionary
|
|
349
|
+
|
|
350
|
+
Raises:
|
|
351
|
+
TomlParseError: If TOML parsing fails
|
|
352
|
+
InvalidSFTConfigError: If validation fails
|
|
353
|
+
"""
|
|
354
|
+
try:
|
|
355
|
+
raw_config = load_toml(config_path)
|
|
356
|
+
except Exception as exc:
|
|
357
|
+
raise TomlParseError(
|
|
358
|
+
path=str(config_path),
|
|
359
|
+
detail=str(exc)
|
|
360
|
+
) from exc
|
|
361
|
+
|
|
362
|
+
return validate_sft_config(raw_config)
|
|
363
|
+
|
|
364
|
+
|
|
365
|
+
def load_and_validate_rl(config_path: Path) -> dict[str, Any]:
|
|
366
|
+
"""Load and validate an RL TOML configuration file.
|
|
367
|
+
|
|
368
|
+
Args:
|
|
369
|
+
config_path: Path to TOML configuration file
|
|
370
|
+
|
|
371
|
+
Returns:
|
|
372
|
+
Validated configuration dictionary
|
|
373
|
+
|
|
374
|
+
Raises:
|
|
375
|
+
TomlParseError: If TOML parsing fails
|
|
376
|
+
InvalidRLConfigError: If validation fails
|
|
377
|
+
"""
|
|
378
|
+
try:
|
|
379
|
+
raw_config = load_toml(config_path)
|
|
380
|
+
except Exception as exc:
|
|
381
|
+
raise TomlParseError(
|
|
382
|
+
path=str(config_path),
|
|
383
|
+
detail=str(exc)
|
|
384
|
+
) from exc
|
|
385
|
+
|
|
386
|
+
return validate_rl_config(raw_config)
|
synth_ai/cli/demo.py
CHANGED
|
@@ -1,144 +1,36 @@
|
|
|
1
|
-
|
|
2
|
-
"""
|
|
3
|
-
CLI: interactive launcher for example demos and forwarders for new RL demo.
|
|
4
|
-
|
|
5
|
-
- `synth-ai demo` (no subcommand) -> legacy examples/ runner (run_demo.sh picker)
|
|
6
|
-
- `synth-ai demo deploy|configure|run` -> forwards to synth_ai.demos.core.cli
|
|
7
|
-
"""
|
|
8
|
-
|
|
9
|
-
from __future__ import annotations
|
|
10
|
-
|
|
11
|
-
import os
|
|
12
|
-
import subprocess
|
|
1
|
+
import shutil
|
|
13
2
|
from pathlib import Path
|
|
14
3
|
|
|
15
4
|
import click
|
|
16
5
|
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
"""
|
|
49
|
-
if ctx.invoked_subcommand is not None:
|
|
50
|
-
return
|
|
51
|
-
|
|
52
|
-
# If explicitly asked to list legacy demos, show interactive picker
|
|
53
|
-
if list_only:
|
|
54
|
-
repo_root = Path(os.getcwd())
|
|
55
|
-
examples_dir = repo_root / "examples"
|
|
56
|
-
demos = _find_demo_scripts(examples_dir)
|
|
57
|
-
if filter_term:
|
|
58
|
-
demos = [p for p in demos if filter_term.lower() in str(p).lower()]
|
|
59
|
-
|
|
60
|
-
if not demos:
|
|
61
|
-
click.echo("No run_demo.sh scripts found under examples/.")
|
|
62
|
-
return
|
|
63
|
-
|
|
64
|
-
click.echo("Available demos:")
|
|
65
|
-
for idx, p in enumerate(demos, start=1):
|
|
66
|
-
click.echo(f" {idx}. {p.relative_to(repo_root)}")
|
|
67
|
-
click.echo("")
|
|
68
|
-
|
|
69
|
-
def _validate_choice(val: str) -> int:
|
|
70
|
-
try:
|
|
71
|
-
i = int(val)
|
|
72
|
-
except Exception as err:
|
|
73
|
-
raise click.BadParameter("Enter a number from the list") from err
|
|
74
|
-
if i < 1 or i > len(demos):
|
|
75
|
-
raise click.BadParameter(f"Choose a number between 1 and {len(demos)}")
|
|
76
|
-
return i
|
|
77
|
-
|
|
78
|
-
choice = click.prompt("Select a demo to run", value_proc=_validate_choice)
|
|
79
|
-
script = demos[choice - 1]
|
|
80
|
-
|
|
81
|
-
click.echo("")
|
|
82
|
-
click.echo(f"š Running {script.relative_to(repo_root)}\n")
|
|
83
|
-
|
|
84
|
-
try:
|
|
85
|
-
subprocess.run(["bash", str(script)], check=True)
|
|
86
|
-
except subprocess.CalledProcessError as e:
|
|
87
|
-
click.echo(f"ā Demo exited with non-zero status: {e.returncode}")
|
|
88
|
-
except KeyboardInterrupt:
|
|
89
|
-
click.echo("\nš Demo interrupted by user")
|
|
90
|
-
return
|
|
91
|
-
|
|
92
|
-
# Default: forward to RL demo init behavior, optionally with --force
|
|
93
|
-
args: list[str] = ["rl_demo.init"]
|
|
94
|
-
if force:
|
|
95
|
-
args.append("--force")
|
|
96
|
-
_forward_to_new(args)
|
|
97
|
-
|
|
98
|
-
# (prepare command removed; configure now prepares baseline TOML)
|
|
99
|
-
|
|
100
|
-
# Help pyright understand dynamic Click group attributes
|
|
101
|
-
from typing import Any, cast as _cast
|
|
102
|
-
_dg = _cast(Any, demo)
|
|
103
|
-
|
|
104
|
-
@_dg.command("deploy")
|
|
105
|
-
@click.option("--local", is_flag=True, help="Run local FastAPI instead of Modal deploy")
|
|
106
|
-
@click.option("--app", type=click.Path(), default=None, help="Path to Modal app.py for uv run modal deploy")
|
|
107
|
-
@click.option("--name", type=str, default="synth-math-demo", help="Modal app name")
|
|
108
|
-
@click.option("--script", type=click.Path(), default=None, help="Path to deploy_task_app.sh (optional legacy)")
|
|
109
|
-
def demo_deploy(local: bool, app: str | None, name: str, script: str | None):
|
|
110
|
-
args: list[str] = ["rl_demo.deploy"]
|
|
111
|
-
if local:
|
|
112
|
-
args.append("--local")
|
|
113
|
-
if app:
|
|
114
|
-
args.extend(["--app", app])
|
|
115
|
-
if name:
|
|
116
|
-
args.extend(["--name", name])
|
|
117
|
-
if script:
|
|
118
|
-
args.extend(["--script", script])
|
|
119
|
-
_forward_to_new(args)
|
|
120
|
-
|
|
121
|
-
@_dg.command("configure")
|
|
122
|
-
def demo_configure():
|
|
123
|
-
_forward_to_new(["rl_demo.configure"])
|
|
124
|
-
|
|
125
|
-
@_dg.command("setup")
|
|
126
|
-
def demo_setup():
|
|
127
|
-
_forward_to_new(["rl_demo.setup"])
|
|
128
|
-
|
|
129
|
-
@_dg.command("run")
|
|
130
|
-
@click.option("--batch-size", type=int, default=None)
|
|
131
|
-
@click.option("--group-size", type=int, default=None)
|
|
132
|
-
@click.option("--model", type=str, default=None)
|
|
133
|
-
@click.option("--timeout", type=int, default=600)
|
|
134
|
-
def demo_run(batch_size: int | None, group_size: int | None, model: str | None, timeout: int):
|
|
135
|
-
args = ["rl_demo.run"]
|
|
136
|
-
if batch_size is not None:
|
|
137
|
-
args.extend(["--batch-size", str(batch_size)])
|
|
138
|
-
if group_size is not None:
|
|
139
|
-
args.extend(["--group-size", str(group_size)])
|
|
140
|
-
if model:
|
|
141
|
-
args.extend(["--model", model])
|
|
142
|
-
if timeout:
|
|
143
|
-
args.extend(["--timeout", str(timeout)])
|
|
144
|
-
_forward_to_new(args)
|
|
6
|
+
DEMO_SOURCES: dict[str, str] = {
|
|
7
|
+
"local": "crafter",
|
|
8
|
+
"modal": "math"
|
|
9
|
+
}
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
@click.command()
|
|
13
|
+
@click.option(
|
|
14
|
+
"--runtime",
|
|
15
|
+
"runtime",
|
|
16
|
+
type=click.Choice(tuple(DEMO_SOURCES.keys()), case_sensitive=False),
|
|
17
|
+
default="local",
|
|
18
|
+
show_default=True,
|
|
19
|
+
help="Select runtime to load a demo task app to your cwd. Options: local, modal"
|
|
20
|
+
)
|
|
21
|
+
def demo_cmd(runtime: str) -> None:
|
|
22
|
+
runtime_key = runtime.lower()
|
|
23
|
+
demo_name = DEMO_SOURCES[runtime_key]
|
|
24
|
+
package_root = Path(__file__).resolve().parents[1]
|
|
25
|
+
src = package_root / "demos" / demo_name
|
|
26
|
+
if not src.exists():
|
|
27
|
+
raise click.ClickException(f"Demo source directory not found: {src}")
|
|
28
|
+
|
|
29
|
+
dst = Path.cwd() / src.name
|
|
30
|
+
if dst.exists():
|
|
31
|
+
raise click.ClickException(
|
|
32
|
+
f"Destination already exists: {dst}. Remove it first if you want to re-copy."
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
shutil.copytree(src, dst)
|
|
36
|
+
click.echo(f"Copied {demo_name} demo to {dst}")
|