synth-ai 0.2.8.dev4__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- examples/rl/task_app/math_single_step.py +990 -0
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +63 -0
- synth_ai/api/train/builders.py +473 -0
- synth_ai/api/train/cli.py +1185 -0
- synth_ai/api/train/config_finder.py +246 -0
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +352 -0
- synth_ai/api/train/pollers.py +91 -0
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +195 -0
- synth_ai/api/train/utils.py +244 -0
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +90 -45
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +166 -114
- synth_ai/cli/root.py +143 -112
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +3134 -0
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +745 -416
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +7 -1
- synth_ai/demos/demo_task_apps/core.py +75 -37
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/config.toml +55 -110
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +491 -166
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +37 -0
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +116 -3
- synth_ai/task/apps/__init__.py +132 -0
- synth_ai/task/auth.py +165 -0
- synth_ai/task/client.py +167 -0
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +173 -57
- synth_ai/task/datasets.py +108 -0
- synth_ai/task/errors.py +50 -0
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +111 -0
- synth_ai/task/proxy.py +251 -0
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +219 -0
- synth_ai/task/server.py +432 -0
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +95 -0
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +59 -0
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +86 -21
- synth_ai/tracing_v3/storage/base.py +98 -12
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -63
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -760
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.8.dev4.dist-info/METADATA +0 -129
- synth_ai-0.2.8.dev4.dist-info/RECORD +0 -420
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.8.dev4.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,496 @@
|
|
|
1
|
+
"""Prompt Learning configuration models for MIPRO and GEPA."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from collections.abc import Mapping
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import Field, field_validator
|
|
10
|
+
|
|
11
|
+
from ..utils import load_toml
|
|
12
|
+
from .shared import ExtraModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class InferenceMode(str, Enum):
|
|
16
|
+
synth_hosted = "synth_hosted"
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class ProviderName(str, Enum):
|
|
20
|
+
openai = "openai"
|
|
21
|
+
groq = "groq"
|
|
22
|
+
google = "google"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class PromptLearningPolicyConfig(ExtraModel):
|
|
26
|
+
"""Policy configuration for prompt learning (model, provider, etc.)."""
|
|
27
|
+
model: str
|
|
28
|
+
provider: ProviderName
|
|
29
|
+
inference_url: str | None = None # Optional - trainer provides it in rollout requests (ignored if present)
|
|
30
|
+
inference_mode: InferenceMode = InferenceMode.synth_hosted
|
|
31
|
+
temperature: float = 0.0
|
|
32
|
+
max_completion_tokens: int = 512
|
|
33
|
+
policy_name: str | None = None
|
|
34
|
+
|
|
35
|
+
@field_validator("inference_url", mode="before")
|
|
36
|
+
@classmethod
|
|
37
|
+
def _strip_inference_url(cls, v: str | None) -> str | None:
|
|
38
|
+
"""Strip whitespace from inference_url if provided."""
|
|
39
|
+
if v is None:
|
|
40
|
+
return None
|
|
41
|
+
if isinstance(v, str):
|
|
42
|
+
v = v.strip()
|
|
43
|
+
# Validate that URL starts with http:// or https:// if provided (non-empty)
|
|
44
|
+
if v and not v.startswith(("http://", "https://")):
|
|
45
|
+
raise ValueError("inference_url must start with http:// or https://")
|
|
46
|
+
# Reject empty strings after stripping
|
|
47
|
+
if not v:
|
|
48
|
+
raise ValueError("inference_url must start with http:// or https://")
|
|
49
|
+
return v
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class MessagePatternConfig(ExtraModel):
|
|
53
|
+
"""Configuration for a single message pattern."""
|
|
54
|
+
role: str
|
|
55
|
+
pattern: str
|
|
56
|
+
order: int = 0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class PromptPatternConfig(ExtraModel):
|
|
60
|
+
"""Initial prompt pattern configuration."""
|
|
61
|
+
id: str | None = None
|
|
62
|
+
name: str | None = None
|
|
63
|
+
messages: list[MessagePatternConfig] = []
|
|
64
|
+
wildcards: dict[str, str] = Field(default_factory=dict)
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class MIPROConfig(ExtraModel):
|
|
68
|
+
"""MIPRO-specific configuration.
|
|
69
|
+
|
|
70
|
+
MIPROv2 uses meta-learning with bootstrap phase, TPE optimization, and mini-batch evaluation
|
|
71
|
+
to efficiently optimize prompts with fewer evaluations than genetic algorithms.
|
|
72
|
+
"""
|
|
73
|
+
num_iterations: int = 20
|
|
74
|
+
num_evaluations_per_iteration: int = 5
|
|
75
|
+
batch_size: int = 32
|
|
76
|
+
max_concurrent: int = 20
|
|
77
|
+
env_name: str = "banking77"
|
|
78
|
+
env_config: dict[str, Any] | None = None
|
|
79
|
+
meta_model: str = "gpt-4o-mini"
|
|
80
|
+
meta_model_provider: str = "openai"
|
|
81
|
+
meta_model_inference_url: str | None = None
|
|
82
|
+
few_shot_score_threshold: float = 0.8
|
|
83
|
+
results_file: str | None = None
|
|
84
|
+
max_wall_clock_seconds: float | None = None
|
|
85
|
+
max_total_tokens: int | None = None
|
|
86
|
+
|
|
87
|
+
# Token and budget configuration (mirrors GEPA pattern)
|
|
88
|
+
max_token_limit: int | None = None # Total tokens across all rollouts (policy + proposer)
|
|
89
|
+
max_spend_usd: float | None = None # Maximum spend in USD
|
|
90
|
+
token_counting_model: str = "gpt-4" # Model for token estimation (tiktoken)
|
|
91
|
+
enforce_token_limit: bool = True # Halt optimization if limit exceeded
|
|
92
|
+
|
|
93
|
+
# TPE configuration
|
|
94
|
+
tpe: dict[str, Any] | None = None
|
|
95
|
+
|
|
96
|
+
# Demo configuration
|
|
97
|
+
demo: dict[str, Any] | None = None
|
|
98
|
+
|
|
99
|
+
# Grounding configuration
|
|
100
|
+
grounding: dict[str, Any] | None = None
|
|
101
|
+
|
|
102
|
+
# Meta-update configuration
|
|
103
|
+
meta_update: dict[str, Any] | None = None
|
|
104
|
+
|
|
105
|
+
# System spec configuration
|
|
106
|
+
spec_path: str | None = None # Path to system spec JSON file
|
|
107
|
+
spec_max_tokens: int = 5000 # Max tokens for spec context in meta-prompt
|
|
108
|
+
spec_include_examples: bool = True # Include examples from spec
|
|
109
|
+
spec_priority_threshold: int | None = None # Only include rules with priority >= threshold
|
|
110
|
+
|
|
111
|
+
# Bootstrap seeds (for few-shot examples)
|
|
112
|
+
bootstrap_train_seeds: list[int] | None = None
|
|
113
|
+
|
|
114
|
+
# Online pool (for mini-batch evaluation)
|
|
115
|
+
online_pool: list[int] | None = None
|
|
116
|
+
|
|
117
|
+
# Test pool (held-out seeds)
|
|
118
|
+
test_pool: list[int] | None = None
|
|
119
|
+
|
|
120
|
+
# Reference pool (for dataset context in meta-prompt, must not overlap with train/test)
|
|
121
|
+
reference_pool: list[int] | None = None
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
# GEPA nested configs (mirroring RL structure)
|
|
125
|
+
class GEPARolloutConfig(ExtraModel):
|
|
126
|
+
"""GEPA rollout configuration (mirrors RL [rollout] section)."""
|
|
127
|
+
budget: int | None = None # Total rollout budget
|
|
128
|
+
max_concurrent: int = 20 # Maximum concurrent rollouts
|
|
129
|
+
minibatch_size: int = 8 # Minibatch size for evaluation
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class GEPAEvaluationConfig(ExtraModel):
|
|
133
|
+
"""GEPA evaluation configuration (mirrors RL [evaluation] section)."""
|
|
134
|
+
seeds: list[int] | None = None # Evaluation seeds (training set)
|
|
135
|
+
validation_seeds: list[int] | None = None # Validation seeds (held-out)
|
|
136
|
+
test_pool: list[int] | None = None # Test pool (final evaluation)
|
|
137
|
+
validation_pool: str | None = None # Pool name for validation (e.g., "validation")
|
|
138
|
+
validation_top_k: int | None = None # Top-K prompts to validate
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class GEPAMutationConfig(ExtraModel):
|
|
142
|
+
"""GEPA mutation configuration (LLM-guided mutation settings)."""
|
|
143
|
+
rate: float = 0.3 # Mutation rate
|
|
144
|
+
llm_model: str | None = None # Model for generating mutations
|
|
145
|
+
llm_provider: str = "groq" # Provider for mutation LLM
|
|
146
|
+
llm_inference_url: str | None = None # Custom inference URL
|
|
147
|
+
prompt: str | None = None # Custom mutation prompt
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
class GEPAPopulationConfig(ExtraModel):
|
|
151
|
+
"""GEPA population configuration (evolution parameters)."""
|
|
152
|
+
initial_size: int = 20 # Initial population size
|
|
153
|
+
num_generations: int = 10 # Number of generations
|
|
154
|
+
children_per_generation: int = 5 # Children generated per generation
|
|
155
|
+
crossover_rate: float = 0.5 # Crossover rate
|
|
156
|
+
selection_pressure: float = 1.0 # Pareto selection pressure
|
|
157
|
+
patience_generations: int = 3 # Early stopping patience
|
|
158
|
+
|
|
159
|
+
|
|
160
|
+
class GEPAArchiveConfig(ExtraModel):
|
|
161
|
+
"""GEPA archive configuration (Pareto archive settings)."""
|
|
162
|
+
size: int = 64 # Archive size
|
|
163
|
+
pareto_set_size: int = 64 # Pareto set size
|
|
164
|
+
pareto_eps: float = 1e-6 # Pareto epsilon
|
|
165
|
+
feedback_fraction: float = 0.5 # Fraction of archive for feedback
|
|
166
|
+
|
|
167
|
+
|
|
168
|
+
class GEPATokenConfig(ExtraModel):
|
|
169
|
+
"""GEPA token and budget configuration."""
|
|
170
|
+
max_limit: int | None = None # Maximum tokens allowed in prompt
|
|
171
|
+
counting_model: str = "gpt-4" # Model for token counting
|
|
172
|
+
enforce_pattern_limit: bool = True # Enforce token limit on patterns
|
|
173
|
+
max_spend_usd: float | None = None # Maximum spend in USD
|
|
174
|
+
|
|
175
|
+
|
|
176
|
+
class GEPAModuleConfig(ExtraModel):
|
|
177
|
+
"""Configuration for a single GEPA pipeline module/stage (instruction-only)."""
|
|
178
|
+
module_id: str
|
|
179
|
+
max_instruction_slots: int = 3
|
|
180
|
+
allowed_tools: list[str] | None = None
|
|
181
|
+
max_tokens: int | None = None
|
|
182
|
+
|
|
183
|
+
@field_validator("module_id")
|
|
184
|
+
@classmethod
|
|
185
|
+
def _validate_module_id(cls, v: str) -> str:
|
|
186
|
+
v = v.strip()
|
|
187
|
+
if not v:
|
|
188
|
+
raise ValueError("module_id cannot be empty")
|
|
189
|
+
return v
|
|
190
|
+
|
|
191
|
+
@field_validator("max_instruction_slots")
|
|
192
|
+
@classmethod
|
|
193
|
+
def _validate_slots(cls, v: int) -> int:
|
|
194
|
+
if v < 1:
|
|
195
|
+
raise ValueError("max_instruction_slots must be >= 1")
|
|
196
|
+
return v
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
class GEPAConfig(ExtraModel):
|
|
200
|
+
"""GEPA-specific configuration with nested subsections."""
|
|
201
|
+
# Top-level fields (for backwards compatibility)
|
|
202
|
+
env_name: str = "banking77"
|
|
203
|
+
env_config: dict[str, Any] | None = None
|
|
204
|
+
rng_seed: int | None = None
|
|
205
|
+
proposer_type: str = "dspy" # "dspy" or "synth"
|
|
206
|
+
|
|
207
|
+
# Multi-stage pipeline support
|
|
208
|
+
modules: list[GEPAModuleConfig] | None = None
|
|
209
|
+
|
|
210
|
+
# Nested subsections (preferred, mirrors RL structure)
|
|
211
|
+
rollout: GEPARolloutConfig | None = None
|
|
212
|
+
evaluation: GEPAEvaluationConfig | None = None
|
|
213
|
+
mutation: GEPAMutationConfig | None = None
|
|
214
|
+
population: GEPAPopulationConfig | None = None
|
|
215
|
+
archive: GEPAArchiveConfig | None = None
|
|
216
|
+
token: GEPATokenConfig | None = None
|
|
217
|
+
|
|
218
|
+
# Backwards compatibility: flat fields (deprecated, prefer nested)
|
|
219
|
+
# These will be flattened from nested configs if provided
|
|
220
|
+
rollout_budget: int | None = None
|
|
221
|
+
max_concurrent_rollouts: int | None = None
|
|
222
|
+
minibatch_size: int | None = None
|
|
223
|
+
evaluation_seeds: list[int] | None = None
|
|
224
|
+
validation_seeds: list[int] | None = None
|
|
225
|
+
test_pool: list[int] | None = None
|
|
226
|
+
validation_pool: str | None = None
|
|
227
|
+
validation_top_k: int | None = None
|
|
228
|
+
mutation_rate: float | None = None
|
|
229
|
+
mutation_llm_model: str | None = None
|
|
230
|
+
mutation_llm_provider: str | None = None
|
|
231
|
+
mutation_llm_inference_url: str | None = None
|
|
232
|
+
mutation_prompt: str | None = None
|
|
233
|
+
initial_population_size: int | None = None
|
|
234
|
+
num_generations: int | None = None
|
|
235
|
+
children_per_generation: int | None = None
|
|
236
|
+
crossover_rate: float | None = None
|
|
237
|
+
selection_pressure: float | None = None
|
|
238
|
+
patience_generations: int | None = None
|
|
239
|
+
archive_size: int | None = None
|
|
240
|
+
pareto_set_size: int | None = None
|
|
241
|
+
pareto_eps: float | None = None
|
|
242
|
+
feedback_fraction: float | None = None
|
|
243
|
+
max_token_limit: int | None = None
|
|
244
|
+
token_counting_model: str | None = None
|
|
245
|
+
enforce_pattern_token_limit: bool | None = None
|
|
246
|
+
max_spend_usd: float | None = None
|
|
247
|
+
|
|
248
|
+
def _get_rollout_budget(self) -> int | None:
|
|
249
|
+
"""Get rollout budget from nested or flat structure."""
|
|
250
|
+
if self.rollout and self.rollout.budget is not None:
|
|
251
|
+
return self.rollout.budget
|
|
252
|
+
return self.rollout_budget
|
|
253
|
+
|
|
254
|
+
def _get_max_concurrent_rollouts(self) -> int:
|
|
255
|
+
"""Get max concurrent rollouts from nested or flat structure."""
|
|
256
|
+
if self.rollout and self.rollout.max_concurrent is not None:
|
|
257
|
+
return self.rollout.max_concurrent
|
|
258
|
+
return self.max_concurrent_rollouts or 20
|
|
259
|
+
|
|
260
|
+
def _get_minibatch_size(self) -> int:
|
|
261
|
+
"""Get minibatch size from nested or flat structure."""
|
|
262
|
+
if self.rollout and self.rollout.minibatch_size is not None:
|
|
263
|
+
return self.rollout.minibatch_size
|
|
264
|
+
return self.minibatch_size or 8
|
|
265
|
+
|
|
266
|
+
def _get_evaluation_seeds(self) -> list[int] | None:
|
|
267
|
+
"""Get evaluation seeds from nested or flat structure."""
|
|
268
|
+
if self.evaluation and self.evaluation.seeds is not None:
|
|
269
|
+
return self.evaluation.seeds
|
|
270
|
+
return self.evaluation_seeds
|
|
271
|
+
|
|
272
|
+
def _get_validation_seeds(self) -> list[int] | None:
|
|
273
|
+
"""Get validation seeds from nested or flat structure."""
|
|
274
|
+
if self.evaluation and self.evaluation.validation_seeds is not None:
|
|
275
|
+
return self.evaluation.validation_seeds
|
|
276
|
+
return self.validation_seeds
|
|
277
|
+
|
|
278
|
+
def _get_test_pool(self) -> list[int] | None:
|
|
279
|
+
"""Get test pool from nested or flat structure."""
|
|
280
|
+
if self.evaluation and self.evaluation.test_pool is not None:
|
|
281
|
+
return self.evaluation.test_pool
|
|
282
|
+
return self.test_pool
|
|
283
|
+
|
|
284
|
+
def _get_mutation_rate(self) -> float:
|
|
285
|
+
"""Get mutation rate from nested or flat structure."""
|
|
286
|
+
if self.mutation and self.mutation.rate is not None:
|
|
287
|
+
return self.mutation.rate
|
|
288
|
+
return self.mutation_rate or 0.3
|
|
289
|
+
|
|
290
|
+
def _get_mutation_llm_model(self) -> str | None:
|
|
291
|
+
"""Get mutation LLM model from nested or flat structure."""
|
|
292
|
+
if self.mutation and self.mutation.llm_model is not None:
|
|
293
|
+
return self.mutation.llm_model
|
|
294
|
+
return self.mutation_llm_model
|
|
295
|
+
|
|
296
|
+
def _get_mutation_llm_provider(self) -> str:
|
|
297
|
+
"""Get mutation LLM provider from nested or flat structure."""
|
|
298
|
+
if self.mutation and self.mutation.llm_provider is not None:
|
|
299
|
+
return self.mutation.llm_provider
|
|
300
|
+
return self.mutation_llm_provider or "groq"
|
|
301
|
+
|
|
302
|
+
def _get_mutation_llm_inference_url(self) -> str | None:
|
|
303
|
+
"""Get mutation LLM inference URL from nested or flat structure."""
|
|
304
|
+
if self.mutation and self.mutation.llm_inference_url is not None:
|
|
305
|
+
return self.mutation.llm_inference_url
|
|
306
|
+
return self.mutation_llm_inference_url
|
|
307
|
+
|
|
308
|
+
def _get_mutation_prompt(self) -> str | None:
|
|
309
|
+
"""Get mutation prompt from nested or flat structure."""
|
|
310
|
+
if self.mutation and self.mutation.prompt is not None:
|
|
311
|
+
return self.mutation.prompt
|
|
312
|
+
return self.mutation_prompt
|
|
313
|
+
|
|
314
|
+
def _get_initial_population_size(self) -> int:
|
|
315
|
+
"""Get initial population size from nested or flat structure."""
|
|
316
|
+
if self.population and self.population.initial_size is not None:
|
|
317
|
+
return self.population.initial_size
|
|
318
|
+
return self.initial_population_size or 20
|
|
319
|
+
|
|
320
|
+
def _get_num_generations(self) -> int:
|
|
321
|
+
"""Get num generations from nested or flat structure."""
|
|
322
|
+
if self.population and self.population.num_generations is not None:
|
|
323
|
+
return self.population.num_generations
|
|
324
|
+
return self.num_generations or 10
|
|
325
|
+
|
|
326
|
+
def _get_children_per_generation(self) -> int:
|
|
327
|
+
"""Get children per generation from nested or flat structure."""
|
|
328
|
+
if self.population and self.population.children_per_generation is not None:
|
|
329
|
+
return self.population.children_per_generation
|
|
330
|
+
return self.children_per_generation or 5
|
|
331
|
+
|
|
332
|
+
def _get_crossover_rate(self) -> float:
|
|
333
|
+
"""Get crossover rate from nested or flat structure."""
|
|
334
|
+
if self.population and self.population.crossover_rate is not None:
|
|
335
|
+
return self.population.crossover_rate
|
|
336
|
+
return self.crossover_rate or 0.5
|
|
337
|
+
|
|
338
|
+
def _get_selection_pressure(self) -> float:
|
|
339
|
+
"""Get selection pressure from nested or flat structure."""
|
|
340
|
+
if self.population and self.population.selection_pressure is not None:
|
|
341
|
+
return self.population.selection_pressure
|
|
342
|
+
return self.selection_pressure or 1.0
|
|
343
|
+
|
|
344
|
+
def _get_patience_generations(self) -> int:
|
|
345
|
+
"""Get patience generations from nested or flat structure."""
|
|
346
|
+
if self.population and self.population.patience_generations is not None:
|
|
347
|
+
return self.population.patience_generations
|
|
348
|
+
return self.patience_generations or 3
|
|
349
|
+
|
|
350
|
+
def _get_archive_size(self) -> int:
|
|
351
|
+
"""Get archive size from nested or flat structure."""
|
|
352
|
+
if self.archive and self.archive.size is not None:
|
|
353
|
+
return self.archive.size
|
|
354
|
+
return self.archive_size or 64
|
|
355
|
+
|
|
356
|
+
def _get_pareto_set_size(self) -> int:
|
|
357
|
+
"""Get pareto set size from nested or flat structure."""
|
|
358
|
+
if self.archive and self.archive.pareto_set_size is not None:
|
|
359
|
+
return self.archive.pareto_set_size
|
|
360
|
+
return self.pareto_set_size or 64
|
|
361
|
+
|
|
362
|
+
def _get_pareto_eps(self) -> float:
|
|
363
|
+
"""Get pareto eps from nested or flat structure."""
|
|
364
|
+
if self.archive and self.archive.pareto_eps is not None:
|
|
365
|
+
return self.archive.pareto_eps
|
|
366
|
+
return self.pareto_eps or 1e-6
|
|
367
|
+
|
|
368
|
+
def _get_feedback_fraction(self) -> float:
|
|
369
|
+
"""Get feedback fraction from nested or flat structure."""
|
|
370
|
+
if self.archive and self.archive.feedback_fraction is not None:
|
|
371
|
+
return self.archive.feedback_fraction
|
|
372
|
+
return self.feedback_fraction or 0.5
|
|
373
|
+
|
|
374
|
+
def _get_max_token_limit(self) -> int | None:
|
|
375
|
+
"""Get max token limit from nested or flat structure."""
|
|
376
|
+
if self.token and self.token.max_limit is not None:
|
|
377
|
+
return self.token.max_limit
|
|
378
|
+
return self.max_token_limit
|
|
379
|
+
|
|
380
|
+
def _get_token_counting_model(self) -> str:
|
|
381
|
+
"""Get token counting model from nested or flat structure."""
|
|
382
|
+
if self.token and self.token.counting_model is not None:
|
|
383
|
+
return self.token.counting_model
|
|
384
|
+
return self.token_counting_model or "gpt-4"
|
|
385
|
+
|
|
386
|
+
def _get_enforce_pattern_token_limit(self) -> bool:
|
|
387
|
+
"""Get enforce pattern token limit from nested or flat structure."""
|
|
388
|
+
if self.token and self.token.enforce_pattern_limit is not None:
|
|
389
|
+
return self.token.enforce_pattern_limit
|
|
390
|
+
return self.enforce_pattern_token_limit if self.enforce_pattern_token_limit is not None else True
|
|
391
|
+
|
|
392
|
+
def _get_max_spend_usd(self) -> float | None:
|
|
393
|
+
"""Get max spend USD from nested or flat structure."""
|
|
394
|
+
if self.token and self.token.max_spend_usd is not None:
|
|
395
|
+
return self.token.max_spend_usd
|
|
396
|
+
return self.max_spend_usd
|
|
397
|
+
|
|
398
|
+
@classmethod
|
|
399
|
+
def from_mapping(cls, data: Mapping[str, Any]) -> GEPAConfig:
|
|
400
|
+
"""Load GEPA config from dict/TOML, handling both nested and flat structures."""
|
|
401
|
+
# Check for nested structure first
|
|
402
|
+
nested_data = {}
|
|
403
|
+
flat_data = {}
|
|
404
|
+
|
|
405
|
+
for key, value in data.items():
|
|
406
|
+
if key in ("rollout", "evaluation", "mutation", "population", "archive", "token", "modules"):
|
|
407
|
+
nested_data[key] = value
|
|
408
|
+
else:
|
|
409
|
+
flat_data[key] = value
|
|
410
|
+
|
|
411
|
+
# If we have nested data, create nested configs
|
|
412
|
+
if nested_data:
|
|
413
|
+
if "rollout" in nested_data:
|
|
414
|
+
nested_data["rollout"] = GEPARolloutConfig.model_validate(nested_data["rollout"])
|
|
415
|
+
if "evaluation" in nested_data:
|
|
416
|
+
nested_data["evaluation"] = GEPAEvaluationConfig.model_validate(nested_data["evaluation"])
|
|
417
|
+
if "mutation" in nested_data:
|
|
418
|
+
nested_data["mutation"] = GEPAMutationConfig.model_validate(nested_data["mutation"])
|
|
419
|
+
if "population" in nested_data:
|
|
420
|
+
nested_data["population"] = GEPAPopulationConfig.model_validate(nested_data["population"])
|
|
421
|
+
if "archive" in nested_data:
|
|
422
|
+
nested_data["archive"] = GEPAArchiveConfig.model_validate(nested_data["archive"])
|
|
423
|
+
if "token" in nested_data:
|
|
424
|
+
nested_data["token"] = GEPATokenConfig.model_validate(nested_data["token"])
|
|
425
|
+
if "modules" in nested_data:
|
|
426
|
+
modules_data = nested_data["modules"]
|
|
427
|
+
if isinstance(modules_data, list):
|
|
428
|
+
nested_data["modules"] = [
|
|
429
|
+
GEPAModuleConfig.model_validate(m) if isinstance(m, dict) else m
|
|
430
|
+
for m in modules_data
|
|
431
|
+
]
|
|
432
|
+
|
|
433
|
+
# Merge nested and flat data
|
|
434
|
+
merged_data = {**flat_data, **nested_data}
|
|
435
|
+
return cls.model_validate(merged_data)
|
|
436
|
+
|
|
437
|
+
|
|
438
|
+
class PromptLearningConfig(ExtraModel):
|
|
439
|
+
"""Top-level prompt learning configuration."""
|
|
440
|
+
algorithm: str # "mipro" or "gepa"
|
|
441
|
+
task_app_url: str
|
|
442
|
+
task_app_api_key: str | None = None
|
|
443
|
+
task_app_id: str | None = None
|
|
444
|
+
initial_prompt: PromptPatternConfig | None = None
|
|
445
|
+
policy: PromptLearningPolicyConfig | None = None
|
|
446
|
+
mipro: MIPROConfig | None = None
|
|
447
|
+
gepa: GEPAConfig | None = None
|
|
448
|
+
env_config: dict[str, Any] | None = None
|
|
449
|
+
|
|
450
|
+
def to_dict(self) -> dict[str, Any]:
|
|
451
|
+
"""Convert config to dictionary for API payload."""
|
|
452
|
+
result = self.model_dump(mode="python", exclude_none=True)
|
|
453
|
+
# Ensure prompt_learning section wraps everything
|
|
454
|
+
if "prompt_learning" not in result:
|
|
455
|
+
pl_data = dict(result.items())
|
|
456
|
+
result = {"prompt_learning": pl_data}
|
|
457
|
+
return result
|
|
458
|
+
|
|
459
|
+
@classmethod
|
|
460
|
+
def from_mapping(cls, data: Mapping[str, Any]) -> PromptLearningConfig:
|
|
461
|
+
"""Load prompt learning config from dict/TOML mapping."""
|
|
462
|
+
# Handle both [prompt_learning] section and flat structure
|
|
463
|
+
pl_data = data.get("prompt_learning", {})
|
|
464
|
+
if not pl_data:
|
|
465
|
+
# If no prompt_learning section, assume top-level is prompt_learning
|
|
466
|
+
pl_data = dict(data)
|
|
467
|
+
|
|
468
|
+
# Handle gepa config specially to support nested structure
|
|
469
|
+
if "gepa" in pl_data and isinstance(pl_data["gepa"], dict):
|
|
470
|
+
gepa_data = pl_data["gepa"]
|
|
471
|
+
pl_data["gepa"] = GEPAConfig.from_mapping(gepa_data)
|
|
472
|
+
|
|
473
|
+
return cls.model_validate(pl_data)
|
|
474
|
+
|
|
475
|
+
@classmethod
|
|
476
|
+
def from_path(cls, path: Path) -> PromptLearningConfig:
|
|
477
|
+
"""Load prompt learning config from TOML file."""
|
|
478
|
+
content = load_toml(path)
|
|
479
|
+
return cls.from_mapping(content)
|
|
480
|
+
|
|
481
|
+
|
|
482
|
+
__all__ = [
|
|
483
|
+
"GEPAConfig",
|
|
484
|
+
"GEPAModuleConfig",
|
|
485
|
+
"GEPARolloutConfig",
|
|
486
|
+
"GEPAEvaluationConfig",
|
|
487
|
+
"GEPAMutationConfig",
|
|
488
|
+
"GEPAPopulationConfig",
|
|
489
|
+
"GEPAArchiveConfig",
|
|
490
|
+
"GEPATokenConfig",
|
|
491
|
+
"MIPROConfig",
|
|
492
|
+
"MessagePatternConfig",
|
|
493
|
+
"PromptLearningConfig",
|
|
494
|
+
"PromptLearningPolicyConfig",
|
|
495
|
+
"PromptPatternConfig",
|
|
496
|
+
]
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from collections.abc import Mapping
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from pydantic import model_validator
|
|
8
|
+
|
|
9
|
+
from ..utils import load_toml
|
|
10
|
+
from .shared import AlgorithmConfig, ComputeConfig, ExtraModel, LoraConfig, PolicyConfig
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class RLServicesConfig(ExtraModel):
|
|
14
|
+
task_url: str
|
|
15
|
+
judge_url: str | None = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class ModelConfig(ExtraModel):
|
|
19
|
+
source: str | None = None
|
|
20
|
+
base: str | None = None
|
|
21
|
+
trainer_mode: str
|
|
22
|
+
label: str
|
|
23
|
+
|
|
24
|
+
@model_validator(mode="after")
|
|
25
|
+
def _ensure_exactly_one_source_or_base(self) -> ModelConfig:
|
|
26
|
+
if bool(self.source) == bool(self.base):
|
|
27
|
+
raise ValueError("Config must set exactly one of [model].source or [model].base")
|
|
28
|
+
return self
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class RolloutConfig(ExtraModel):
|
|
32
|
+
env_name: str
|
|
33
|
+
policy_name: str
|
|
34
|
+
env_config: dict[str, Any] | None = None
|
|
35
|
+
policy_config: dict[str, Any] | None = None
|
|
36
|
+
max_turns: int
|
|
37
|
+
episodes_per_batch: int
|
|
38
|
+
max_concurrent_rollouts: int
|
|
39
|
+
batches_per_step: int | None = None
|
|
40
|
+
ops: list[str] | None = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class WeightSyncConfig(ExtraModel):
|
|
44
|
+
enable: bool | None = None
|
|
45
|
+
targets: list[str] | None = None
|
|
46
|
+
mode: str | None = None
|
|
47
|
+
direct: bool | None = None
|
|
48
|
+
verify_every_k: int | None = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class RewardsConfig(ExtraModel):
|
|
52
|
+
"""Rewards configuration for RL training."""
|
|
53
|
+
step_rewards_enabled: bool | None = None
|
|
54
|
+
step_rewards_mode: str | None = None
|
|
55
|
+
step_rewards_indicator_lambda: float | None = None
|
|
56
|
+
step_rewards_beta: float | None = None
|
|
57
|
+
step_rewards_strategy: str | None = None
|
|
58
|
+
event_rewards_kind: str | None = None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class RLTrainingConfig(ExtraModel):
|
|
62
|
+
num_epochs: int
|
|
63
|
+
iterations_per_epoch: int
|
|
64
|
+
gradient_accumulation_steps: int | None = None
|
|
65
|
+
max_accumulated_minibatch: int | None = None
|
|
66
|
+
max_turns: int
|
|
67
|
+
batch_size: int
|
|
68
|
+
group_size: int
|
|
69
|
+
learning_rate: float
|
|
70
|
+
log_interval: int | None = None
|
|
71
|
+
weight_sync_interval: int | None = None
|
|
72
|
+
# DEPRECATED: flat reward fields (use rewards.* instead)
|
|
73
|
+
step_rewards_enabled: bool | None = None
|
|
74
|
+
step_rewards_mode: str | None = None
|
|
75
|
+
step_rewards_indicator_lambda: float | None = None
|
|
76
|
+
step_rewards_beta: float | None = None
|
|
77
|
+
step_rewards_strategy: str | None = None
|
|
78
|
+
event_rewards_kind: str | None = None
|
|
79
|
+
# NEW: nested configs
|
|
80
|
+
weight_sync: WeightSyncConfig | None = None
|
|
81
|
+
lora: LoraConfig | None = None
|
|
82
|
+
rewards: RewardsConfig | None = None
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
class EvaluationConfig(ExtraModel):
|
|
86
|
+
instances: int
|
|
87
|
+
every_n_iters: int
|
|
88
|
+
seeds: list[int]
|
|
89
|
+
|
|
90
|
+
|
|
91
|
+
class JudgeOptionsConfig(ExtraModel):
|
|
92
|
+
event: bool | None = None
|
|
93
|
+
outcome: bool | None = None
|
|
94
|
+
provider: str | None = None
|
|
95
|
+
model: str | None = None
|
|
96
|
+
rubric_id: str | None = None
|
|
97
|
+
rubric_overrides: dict[str, Any] | None = None
|
|
98
|
+
tracks: list[str] | None = None
|
|
99
|
+
weights: dict[str, float] | None = None
|
|
100
|
+
max_concurrency: int | None = None
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class RubricConfig(ExtraModel):
|
|
104
|
+
"""Rubric configuration for reward blending."""
|
|
105
|
+
enabled: bool = False
|
|
106
|
+
reward_blend: dict[str, float] | None = None # env, event, outcome weights
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class JudgeConfig(ExtraModel):
|
|
110
|
+
type: str | None = None
|
|
111
|
+
timeout_s: int | None = None
|
|
112
|
+
enabled: bool | None = None # Master switch for judge/rubric
|
|
113
|
+
reward_blend: dict[str, float] | None = None # NEW: nested reward blending (replaces rubric.weights)
|
|
114
|
+
rubric: RubricConfig | None = None # DEPRECATED: use flat fields instead
|
|
115
|
+
options: JudgeOptionsConfig | None = None
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
class SmokeConfig(ExtraModel):
|
|
119
|
+
"""Configuration for local smoke testing (CLI only, ignored by trainer)."""
|
|
120
|
+
# Test parameters
|
|
121
|
+
task_url: str | None = None
|
|
122
|
+
env_name: str | None = None
|
|
123
|
+
policy_name: str | None = None
|
|
124
|
+
max_steps: int | None = None
|
|
125
|
+
policy: str | None = None # mock, gpt-5-nano, openai, groq
|
|
126
|
+
model: str | None = None
|
|
127
|
+
mock_backend: str | None = None # synthetic or openai
|
|
128
|
+
mock_port: int | None = None
|
|
129
|
+
return_trace: bool | None = None
|
|
130
|
+
use_mock: bool | None = None
|
|
131
|
+
|
|
132
|
+
# Task app auto-start configuration
|
|
133
|
+
task_app_name: str | None = None # Task app to serve (e.g., "grpo-crafter")
|
|
134
|
+
task_app_port: int | None = None # Port for task app (default: 8765)
|
|
135
|
+
task_app_env_file: str | None = None # Path to .env file for task app
|
|
136
|
+
task_app_force: bool | None = None # Use --force flag when serving
|
|
137
|
+
|
|
138
|
+
# sqld auto-start configuration
|
|
139
|
+
sqld_auto_start: bool | None = None # Auto-start sqld server
|
|
140
|
+
sqld_db_path: str | None = None # Database path (default: ./traces/local.db)
|
|
141
|
+
sqld_hrana_port: int | None = None # Hrana WebSocket port (default: 8080)
|
|
142
|
+
sqld_http_port: int | None = None # HTTP API port (default: 8081)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class RLConfig(ExtraModel):
|
|
146
|
+
algorithm: AlgorithmConfig
|
|
147
|
+
services: RLServicesConfig
|
|
148
|
+
compute: ComputeConfig | None = None
|
|
149
|
+
topology: dict[str, Any] | None = None # DEPRECATED: use compute.topology instead
|
|
150
|
+
vllm: dict[str, Any] | None = None
|
|
151
|
+
reference: dict[str, Any] | None = None # DEPRECATED: use compute.topology.reference_placement instead
|
|
152
|
+
model: ModelConfig | None = None # DEPRECATED: use policy instead
|
|
153
|
+
policy: PolicyConfig | None = None # NEW: unified policy (preferred)
|
|
154
|
+
lora: dict[str, Any] | None = None # DEPRECATED: use training.lora instead
|
|
155
|
+
rollout: RolloutConfig | None = None
|
|
156
|
+
evaluation: EvaluationConfig | None = None
|
|
157
|
+
training: RLTrainingConfig | None = None
|
|
158
|
+
rubric: dict[str, Any] | None = None # DEPRECATED: use judge.reward_blend and judge.enabled instead
|
|
159
|
+
judge: JudgeConfig | None = None
|
|
160
|
+
tags: dict[str, Any] | None = None
|
|
161
|
+
smoke: SmokeConfig | None = None # CLI-only: local smoke testing config (ignored by trainer)
|
|
162
|
+
|
|
163
|
+
def to_dict(self) -> dict[str, Any]:
|
|
164
|
+
return self.model_dump(mode="python", exclude_none=True)
|
|
165
|
+
|
|
166
|
+
@classmethod
|
|
167
|
+
def from_mapping(cls, data: Mapping[str, Any]) -> RLConfig:
|
|
168
|
+
"""Load RL config from dict/TOML mapping."""
|
|
169
|
+
return cls.model_validate(data)
|
|
170
|
+
|
|
171
|
+
@classmethod
|
|
172
|
+
def from_path(cls, path: Path) -> RLConfig:
|
|
173
|
+
content = load_toml(path)
|
|
174
|
+
return cls.from_mapping(content)
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
__all__ = [
|
|
178
|
+
"EvaluationConfig",
|
|
179
|
+
"JudgeConfig",
|
|
180
|
+
"JudgeOptionsConfig",
|
|
181
|
+
"ModelConfig",
|
|
182
|
+
"RLConfig",
|
|
183
|
+
"RLServicesConfig",
|
|
184
|
+
"RLTrainingConfig",
|
|
185
|
+
"RolloutConfig",
|
|
186
|
+
"SmokeConfig",
|
|
187
|
+
"WeightSyncConfig",
|
|
188
|
+
]
|