synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +60 -2
- synth_ai/api/train/builders.py +347 -39
- synth_ai/api/train/cli.py +895 -160
- synth_ai/api/train/config_finder.py +103 -25
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +70 -20
- synth_ai/api/train/pollers.py +29 -4
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +6 -4
- synth_ai/api/train/utils.py +64 -52
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +85 -63
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +156 -116
- synth_ai/cli/root.py +131 -132
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +2284 -257
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +579 -291
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +50 -30
- synth_ai/task/apps/__init__.py +63 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +165 -64
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +59 -66
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +65 -31
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +44 -28
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +73 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -258
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -107
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/task/apps/grpo_crafter.py +0 -438
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
- synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Deploy Banking77 task app locally for MIPROv2 optimization
|
|
3
|
+
|
|
4
|
+
set -e
|
|
5
|
+
|
|
6
|
+
echo "🚀 Deploying Banking77 Task App for MIPROv2..."
|
|
7
|
+
echo "================================================"
|
|
8
|
+
|
|
9
|
+
# Set up environment variables
|
|
10
|
+
export ENVIRONMENT_API_KEY="${ENVIRONMENT_API_KEY:-$(python -c 'import secrets; print(secrets.token_urlsafe(32))')}"
|
|
11
|
+
export GROQ_API_KEY="${GROQ_API_KEY}"
|
|
12
|
+
|
|
13
|
+
# Check for required env vars
|
|
14
|
+
if [ -z "$GROQ_API_KEY" ]; then
|
|
15
|
+
echo "❌ Error: GROQ_API_KEY not set"
|
|
16
|
+
echo "Please set it: export GROQ_API_KEY=your_key"
|
|
17
|
+
exit 1
|
|
18
|
+
fi
|
|
19
|
+
|
|
20
|
+
echo "✅ ENVIRONMENT_API_KEY: ${ENVIRONMENT_API_KEY:0:20}..."
|
|
21
|
+
echo "✅ GROQ_API_KEY: ${GROQ_API_KEY:0:20}..."
|
|
22
|
+
|
|
23
|
+
# Navigate to repo root
|
|
24
|
+
cd "$(dirname "$0")/../../.."
|
|
25
|
+
|
|
26
|
+
echo ""
|
|
27
|
+
echo "📦 Installing dependencies..."
|
|
28
|
+
uv pip install -e . --quiet || true
|
|
29
|
+
|
|
30
|
+
echo ""
|
|
31
|
+
echo "🏃 Starting Banking77 task app on http://127.0.0.1:8102"
|
|
32
|
+
echo "Press Ctrl+C to stop"
|
|
33
|
+
echo ""
|
|
34
|
+
|
|
35
|
+
# Run the task app
|
|
36
|
+
python -m examples.task_apps.banking77.banking77_task_app \
|
|
37
|
+
--host 0.0.0.0 \
|
|
38
|
+
--port 8102 \
|
|
39
|
+
--reload
|
|
40
|
+
|
|
41
|
+
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
# Banking77 Multi-Step Pipeline (Classifier ➞ Calibrator)
|
|
2
|
+
|
|
3
|
+
This note explains how to spin up the new two-stage Banking77 task app and baseline. The pipeline mirrors the multi-module design from `monorepo/multi_step.md`: a classifier proposes an intent, then a calibrator confirms or adjusts it before reporting the final label.
|
|
4
|
+
|
|
5
|
+
## 1. Prerequisites
|
|
6
|
+
|
|
7
|
+
- Repo checked out and editable: `/Users/joshpurtell/Documents/GitHub/synth-ai`
|
|
8
|
+
- Python dependencies installed (`uv pip install -e .` or equivalent)
|
|
9
|
+
- Environment variables exported:
|
|
10
|
+
- `SYNTH_API_KEY`
|
|
11
|
+
- `ENVIRONMENT_API_KEY` (shared with task app)
|
|
12
|
+
- `GROQ_API_KEY` (policy model; optional if using OpenAI)
|
|
13
|
+
- `OPENAI_API_KEY` (meta-model for MIPRO proposals)
|
|
14
|
+
|
|
15
|
+
## 2. Task App: `banking77-pipeline`
|
|
16
|
+
|
|
17
|
+
The task app lives in `examples/task_apps/banking77_pipeline/`. It reuses the single-step dataset loader and router, but evaluates a two-step sequence inside `rollout_executor`.
|
|
18
|
+
|
|
19
|
+
### Local launch (uvicorn)
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
23
|
+
uvx synth-ai deploy banking77-pipeline --runtime uvicorn --port 8112 \
|
|
24
|
+
--env-file .env --follow
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
- Health check: `curl -H "X-API-Key: $ENVIRONMENT_API_KEY" http://127.0.0.1:8112/health`
|
|
28
|
+
- All inference calls must flow through the prompt-learning interceptor; both classifier and calibrator enforce `tool_choice="required"` on `banking77_classify`.
|
|
29
|
+
|
|
30
|
+
### Modal deploy (optional)
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
uvx synth-ai deploy banking77-pipeline --runtime modal --name banking77-pipeline-dev \
|
|
34
|
+
--env-file .env --follow
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## 3. Baseline Runner
|
|
38
|
+
|
|
39
|
+
`examples/baseline/banking77_pipeline_baseline.py` mirrors the online pipeline so you can measure performance without hosting the task app.
|
|
40
|
+
|
|
41
|
+
Example invocation:
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
cd /Users/joshpurtell/Documents/GitHub/synth-ai
|
|
45
|
+
uvx synth-ai baseline run banking77_pipeline --split train --seeds 0,1,2 --verbose
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Outputs include per-module tool calls, the final intent, and accuracy. Set `--output baseline_results.json` to store artifacts.
|
|
49
|
+
|
|
50
|
+
## 4. Prompt-Learning Configs (MIPROv2)
|
|
51
|
+
|
|
52
|
+
- Task app id: `prompt_learning.task_app_id = "banking77-pipeline"`
|
|
53
|
+
- Default URL points to Modal dev (`https://synth-laboratories-dev--synth-banking77-pipeline-web.modal.run`). Override by setting `TASK_APP_URL` if you run locally.
|
|
54
|
+
- New configs live in `examples/blog_posts/mipro/configs/`:
|
|
55
|
+
- `banking77_pipeline_mipro_local.toml` – main config for local backend runs
|
|
56
|
+
- `banking77_pipeline_mipro_test.toml` – reduced-iteration variant for CI smoke tests
|
|
57
|
+
- Each config includes:
|
|
58
|
+
- `prompt_learning.initial_prompt.metadata.pipeline_modules` with classifier/calibrator instruction text and few-shot placeholders
|
|
59
|
+
- Tuned iteration counts (5×2 for local, 2×2 for smoke) to keep latency manageable since every trial runs two modules
|
|
60
|
+
- Updated seed pools (`bootstrap_train`, `online_pool`, `test_pool`) sized for pipeline evaluation
|
|
61
|
+
|
|
62
|
+
### Running the Multi-Step Optimiser
|
|
63
|
+
|
|
64
|
+
1. Start the task app (port 8112)
|
|
65
|
+
`./examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh`
|
|
66
|
+
2. Kick off optimisation
|
|
67
|
+
`TASK_APP_URL=https://synth-laboratories-dev--synth-banking77-pipeline-web.modal.run \
|
|
68
|
+
./examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh`
|
|
69
|
+
|
|
70
|
+
The run script performs environment checks, verifies the pipeline health endpoint, and forwards the new config to the backend.
|
|
71
|
+
|
|
72
|
+
## 5. Checklist
|
|
73
|
+
|
|
74
|
+
- [x] Task app registered via `ModalDeploymentConfig` under `app_id="banking77-pipeline"`
|
|
75
|
+
- [x] Baseline present (`banking77_pipeline_baseline.py`)
|
|
76
|
+
- [x] Prompt-learning configs + helper scripts landed (`banking77_pipeline_mipro_*.toml`, run/deploy scripts)
|
|
77
|
+
- [ ] Multi-step CLI reporting + tests (pending once backend wiring lands)
|
|
78
|
+
|
|
79
|
+
Use this guide to verify the service locally before integration with the prompt-learning job runner.
|
|
@@ -0,0 +1,191 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Run MIPROv2 optimization for Banking77 against the backend
|
|
3
|
+
|
|
4
|
+
set -e
|
|
5
|
+
|
|
6
|
+
echo "🔬 Running MIPROv2 on Banking77"
|
|
7
|
+
echo "================================="
|
|
8
|
+
|
|
9
|
+
# Navigate to repo root
|
|
10
|
+
REPO_ROOT="$(cd "$(dirname "$0")/../../.." && pwd)"
|
|
11
|
+
cd "$REPO_ROOT"
|
|
12
|
+
|
|
13
|
+
# Save backend-related vars from environment before loading .env files (so they don't get overridden)
|
|
14
|
+
SAVED_BACKEND_BASE_URL="${BACKEND_BASE_URL:-}"
|
|
15
|
+
SAVED_SYNTH_BASE_URL="${SYNTH_BASE_URL:-}"
|
|
16
|
+
|
|
17
|
+
# Load environment variables from .env file if it exists
|
|
18
|
+
# Use a safer method that only loads KEY=VALUE pairs and ignores errors
|
|
19
|
+
_load_env_file() {
|
|
20
|
+
local env_file="$1"
|
|
21
|
+
if [ -f "$env_file" ]; then
|
|
22
|
+
echo "📝 Loading environment variables from $env_file..."
|
|
23
|
+
# Only export lines that look like KEY=VALUE (handles comments and empty lines)
|
|
24
|
+
while IFS= read -r line || [ -n "$line" ]; do
|
|
25
|
+
# Skip comments and empty lines
|
|
26
|
+
[[ "$line" =~ ^[[:space:]]*# ]] && continue
|
|
27
|
+
[[ -z "${line// }" ]] && continue
|
|
28
|
+
# Only export if it looks like KEY=VALUE
|
|
29
|
+
# BUT: Don't override backend URLs if they were set in environment
|
|
30
|
+
if [[ "$line" =~ ^[[:space:]]*BACKEND_BASE_URL= ]]; then
|
|
31
|
+
if [ -z "$SAVED_BACKEND_BASE_URL" ]; then
|
|
32
|
+
# Only load from .env if not already set in environment
|
|
33
|
+
export "$line" 2>/dev/null || true
|
|
34
|
+
else
|
|
35
|
+
echo " ⚠️ Skipping BACKEND_BASE_URL from .env (using environment value)"
|
|
36
|
+
fi
|
|
37
|
+
elif [[ "$line" =~ ^[[:space:]]*SYNTH_BASE_URL= ]]; then
|
|
38
|
+
if [ -z "$SAVED_SYNTH_BASE_URL" ]; then
|
|
39
|
+
# Only load from .env if not already set in environment
|
|
40
|
+
export "$line" 2>/dev/null || true
|
|
41
|
+
else
|
|
42
|
+
echo " ⚠️ Skipping SYNTH_BASE_URL from .env (using environment value)"
|
|
43
|
+
fi
|
|
44
|
+
elif [[ "$line" =~ ^[[:space:]]*[A-Za-z_][A-Za-z0-9_]*= ]]; then
|
|
45
|
+
export "$line" 2>/dev/null || true
|
|
46
|
+
fi
|
|
47
|
+
done < "$env_file"
|
|
48
|
+
fi
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
_load_env_file "$REPO_ROOT/.env"
|
|
52
|
+
_load_env_file "$REPO_ROOT/examples/rl/.env"
|
|
53
|
+
|
|
54
|
+
# Restore backend URLs from environment if they were set
|
|
55
|
+
if [ -n "$SAVED_BACKEND_BASE_URL" ]; then
|
|
56
|
+
export BACKEND_BASE_URL="$SAVED_BACKEND_BASE_URL"
|
|
57
|
+
echo "✅ Using BACKEND_BASE_URL from environment: $BACKEND_BASE_URL"
|
|
58
|
+
fi
|
|
59
|
+
if [ -n "$SAVED_SYNTH_BASE_URL" ]; then
|
|
60
|
+
export SYNTH_BASE_URL="$SAVED_SYNTH_BASE_URL"
|
|
61
|
+
echo "✅ Using SYNTH_BASE_URL from environment: $SYNTH_BASE_URL"
|
|
62
|
+
fi
|
|
63
|
+
|
|
64
|
+
# Check for required environment variables
|
|
65
|
+
if [ -z "$SYNTH_API_KEY" ]; then
|
|
66
|
+
echo "❌ Error: SYNTH_API_KEY not set"
|
|
67
|
+
echo "Please get your API key from the backend and set it:"
|
|
68
|
+
echo " export SYNTH_API_KEY=your_key"
|
|
69
|
+
echo "Or add it to $REPO_ROOT/.env"
|
|
70
|
+
exit 1
|
|
71
|
+
fi
|
|
72
|
+
|
|
73
|
+
if [ -z "$ENVIRONMENT_API_KEY" ]; then
|
|
74
|
+
echo "❌ Error: ENVIRONMENT_API_KEY not set"
|
|
75
|
+
echo "Please set the same key used when deploying the task app:"
|
|
76
|
+
echo " export ENVIRONMENT_API_KEY=your_key"
|
|
77
|
+
echo "Or add it to $REPO_ROOT/.env"
|
|
78
|
+
exit 1
|
|
79
|
+
fi
|
|
80
|
+
|
|
81
|
+
if [ -z "$GROQ_API_KEY" ]; then
|
|
82
|
+
echo "❌ Error: GROQ_API_KEY not set"
|
|
83
|
+
echo "Please set your Groq API key:"
|
|
84
|
+
echo " export GROQ_API_KEY=your_key"
|
|
85
|
+
echo "Or add it to $REPO_ROOT/.env"
|
|
86
|
+
exit 1
|
|
87
|
+
fi
|
|
88
|
+
|
|
89
|
+
# Check for OpenAI API key (needed for meta-model)
|
|
90
|
+
if [ -z "$OPENAI_API_KEY" ]; then
|
|
91
|
+
echo "⚠️ Warning: OPENAI_API_KEY not set"
|
|
92
|
+
echo "MIPROv2 uses a meta-model (gpt-4o-mini) for prompt proposals."
|
|
93
|
+
echo "Please set your OpenAI API key:"
|
|
94
|
+
echo " export OPENAI_API_KEY=your_key"
|
|
95
|
+
echo "Or add it to $REPO_ROOT/.env"
|
|
96
|
+
echo ""
|
|
97
|
+
read -p "Continue anyway? (y/N) " -n 1 -r
|
|
98
|
+
echo
|
|
99
|
+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
100
|
+
exit 1
|
|
101
|
+
fi
|
|
102
|
+
fi
|
|
103
|
+
|
|
104
|
+
# Default to localhost backend if not specified
|
|
105
|
+
# Respect BACKEND_BASE_URL from environment (don't override if already set)
|
|
106
|
+
if [ -z "$BACKEND_BASE_URL" ]; then
|
|
107
|
+
BACKEND_BASE_URL="http://localhost:8000"
|
|
108
|
+
fi
|
|
109
|
+
BACKEND_URL="$BACKEND_BASE_URL"
|
|
110
|
+
# Ensure it doesn't have /api suffix for the base URL check (CLI will add it)
|
|
111
|
+
BACKEND_URL_NO_API="${BACKEND_URL%/api}"
|
|
112
|
+
|
|
113
|
+
echo ""
|
|
114
|
+
echo "🔧 Debug Info:"
|
|
115
|
+
echo " BACKEND_BASE_URL from env: ${SAVED_BACKEND_BASE_URL:-<not set>}"
|
|
116
|
+
echo " BACKEND_BASE_URL current: $BACKEND_BASE_URL"
|
|
117
|
+
echo " BACKEND_URL: $BACKEND_URL"
|
|
118
|
+
echo ""
|
|
119
|
+
|
|
120
|
+
echo "✅ SYNTH_API_KEY: ${SYNTH_API_KEY:0:20}..."
|
|
121
|
+
echo "✅ ENVIRONMENT_API_KEY: ${ENVIRONMENT_API_KEY:0:20}..."
|
|
122
|
+
echo "✅ GROQ_API_KEY: ${GROQ_API_KEY:0:20}..."
|
|
123
|
+
if [ -n "$OPENAI_API_KEY" ]; then
|
|
124
|
+
echo "✅ OPENAI_API_KEY: ${OPENAI_API_KEY:0:20}..."
|
|
125
|
+
fi
|
|
126
|
+
echo "✅ Backend URL: $BACKEND_URL"
|
|
127
|
+
echo ""
|
|
128
|
+
|
|
129
|
+
# Already navigated to repo root above
|
|
130
|
+
|
|
131
|
+
# Check if task app is running
|
|
132
|
+
echo "🔍 Checking if Banking77 task app is running on http://127.0.0.1:8102..."
|
|
133
|
+
if ! curl -s -f -H "X-API-Key: $ENVIRONMENT_API_KEY" http://127.0.0.1:8102/health > /dev/null 2>&1; then
|
|
134
|
+
echo "❌ Error: Banking77 task app is not running on http://127.0.0.1:8102"
|
|
135
|
+
echo ""
|
|
136
|
+
echo "Please start it first:"
|
|
137
|
+
echo " ./examples/blog_posts/mipro/deploy_banking77_task_app.sh"
|
|
138
|
+
echo ""
|
|
139
|
+
echo "Or in another terminal:"
|
|
140
|
+
echo " cd $(pwd)"
|
|
141
|
+
echo " uvx synth-ai deploy banking77 --runtime uvicorn --port 8102"
|
|
142
|
+
exit 1
|
|
143
|
+
fi
|
|
144
|
+
echo "✅ Task app is healthy"
|
|
145
|
+
echo ""
|
|
146
|
+
|
|
147
|
+
# Check backend connection
|
|
148
|
+
echo "🔍 Checking backend connection to $BACKEND_URL..."
|
|
149
|
+
if ! curl -s -f "$BACKEND_URL_NO_API/api/health" > /dev/null 2>&1; then
|
|
150
|
+
echo "⚠️ Warning: Cannot connect to backend at $BACKEND_URL"
|
|
151
|
+
echo "Make sure the backend is running."
|
|
152
|
+
read -p "Continue anyway? (y/N) " -n 1 -r
|
|
153
|
+
echo
|
|
154
|
+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
155
|
+
exit 1
|
|
156
|
+
fi
|
|
157
|
+
else
|
|
158
|
+
echo "✅ Backend is healthy"
|
|
159
|
+
fi
|
|
160
|
+
echo ""
|
|
161
|
+
|
|
162
|
+
echo "🚀 Starting MIPROv2 training..."
|
|
163
|
+
echo "Config: examples/blog_posts/mipro/configs/banking77_mipro_local.toml"
|
|
164
|
+
echo ""
|
|
165
|
+
echo "MIPROv2 Flow:"
|
|
166
|
+
echo " 1. Bootstrap Phase: Evaluate baseline on seeds [0-4], collect few-shot examples"
|
|
167
|
+
echo " 2. Optimization Loop: 16 iterations × 6 variants = 96 evaluations"
|
|
168
|
+
echo " 3. Final Evaluation: Test on held-out seeds [10-19]"
|
|
169
|
+
echo ""
|
|
170
|
+
|
|
171
|
+
# Export backend URLs so CLI respects them (overrides .env files)
|
|
172
|
+
export BACKEND_BASE_URL="$BACKEND_URL"
|
|
173
|
+
# Also set SYNTH_BASE_URL to match (CLI may check this as fallback)
|
|
174
|
+
export SYNTH_BASE_URL="$BACKEND_URL"
|
|
175
|
+
|
|
176
|
+
echo "🚀 Running CLI with:"
|
|
177
|
+
echo " BACKEND_BASE_URL=$BACKEND_BASE_URL"
|
|
178
|
+
echo " SYNTH_BASE_URL=$SYNTH_BASE_URL"
|
|
179
|
+
echo " --backend=$BACKEND_URL"
|
|
180
|
+
echo ""
|
|
181
|
+
|
|
182
|
+
# Run the training
|
|
183
|
+
uvx synth-ai train \
|
|
184
|
+
--type prompt_learning \
|
|
185
|
+
--config examples/blog_posts/mipro/configs/banking77_mipro_local.toml \
|
|
186
|
+
--backend "$BACKEND_URL" \
|
|
187
|
+
--poll
|
|
188
|
+
|
|
189
|
+
echo ""
|
|
190
|
+
echo "✅ MIPROv2 training complete!"
|
|
191
|
+
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Run MIPROv2 optimisation for the multi-step Banking77 pipeline against the backend
|
|
3
|
+
|
|
4
|
+
set -euo pipefail
|
|
5
|
+
|
|
6
|
+
echo "🔬 Running MIPROv2 on Banking77 Pipeline"
|
|
7
|
+
echo "========================================"
|
|
8
|
+
|
|
9
|
+
REPO_ROOT="$(cd "$(dirname "$0")/../../.." && pwd)"
|
|
10
|
+
cd "$REPO_ROOT"
|
|
11
|
+
|
|
12
|
+
SAVED_BACKEND_BASE_URL="${BACKEND_BASE_URL:-}"
|
|
13
|
+
SAVED_SYNTH_BASE_URL="${SYNTH_BASE_URL:-}"
|
|
14
|
+
|
|
15
|
+
_load_env_file() {
|
|
16
|
+
local env_file="$1"
|
|
17
|
+
if [ -f "$env_file" ]; then
|
|
18
|
+
echo "📝 Loading environment variables from $env_file..."
|
|
19
|
+
while IFS= read -r line || [ -n "$line" ]; do
|
|
20
|
+
# Skip comments and empty lines
|
|
21
|
+
[[ "$line" =~ ^[[:space:]]*# ]] && continue
|
|
22
|
+
[[ -z "${line// }" ]] && continue
|
|
23
|
+
# Only process lines with '=' character
|
|
24
|
+
[[ ! "$line" =~ = ]] && continue
|
|
25
|
+
|
|
26
|
+
if [[ "$line" =~ ^[[:space:]]*BACKEND_BASE_URL= ]]; then
|
|
27
|
+
if [ -z "$SAVED_BACKEND_BASE_URL" ]; then
|
|
28
|
+
export "$line" 2>/dev/null || true
|
|
29
|
+
else
|
|
30
|
+
echo " ⚠️ Skipping BACKEND_BASE_URL from .env (using environment value)"
|
|
31
|
+
fi
|
|
32
|
+
elif [[ "$line" =~ ^[[:space:]]*SYNTH_BASE_URL= ]]; then
|
|
33
|
+
if [ -z "$SAVED_SYNTH_BASE_URL" ]; then
|
|
34
|
+
export "$line" 2>/dev/null || true
|
|
35
|
+
else
|
|
36
|
+
echo " ⚠️ Skipping SYNTH_BASE_URL from .env (using environment value)"
|
|
37
|
+
fi
|
|
38
|
+
elif [[ "$line" =~ ^[[:space:]]*[A-Za-z_][A-Za-z0-9_]*= ]]; then
|
|
39
|
+
export "$line" 2>/dev/null || true
|
|
40
|
+
fi
|
|
41
|
+
done < "$env_file"
|
|
42
|
+
fi
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
_load_env_file "$REPO_ROOT/.env"
|
|
46
|
+
_load_env_file "$REPO_ROOT/examples/rl/.env"
|
|
47
|
+
|
|
48
|
+
if [ -n "$SAVED_BACKEND_BASE_URL" ]; then
|
|
49
|
+
export BACKEND_BASE_URL="$SAVED_BACKEND_BASE_URL"
|
|
50
|
+
echo "✅ Using BACKEND_BASE_URL from environment: $BACKEND_BASE_URL"
|
|
51
|
+
fi
|
|
52
|
+
if [ -n "$SAVED_SYNTH_BASE_URL" ]; then
|
|
53
|
+
export SYNTH_BASE_URL="$SAVED_SYNTH_BASE_URL"
|
|
54
|
+
echo "✅ Using SYNTH_BASE_URL from environment: $SYNTH_BASE_URL"
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
if [ -z "${SYNTH_API_KEY:-}" ]; then
|
|
58
|
+
echo "❌ Error: SYNTH_API_KEY not set"
|
|
59
|
+
exit 1
|
|
60
|
+
fi
|
|
61
|
+
|
|
62
|
+
if [ -z "${ENVIRONMENT_API_KEY:-}" ]; then
|
|
63
|
+
echo "❌ Error: ENVIRONMENT_API_KEY not set"
|
|
64
|
+
exit 1
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
if [ -z "${GROQ_API_KEY:-}" ] && [ -z "${OPENAI_API_KEY:-}" ]; then
|
|
68
|
+
echo "⚠️ Warning: Neither GROQ_API_KEY nor OPENAI_API_KEY is set."
|
|
69
|
+
echo "The policy defaults to Groq-hosted OSS models. Set GROQ_API_KEY to avoid failures."
|
|
70
|
+
fi
|
|
71
|
+
|
|
72
|
+
if [ -z "${OPENAI_API_KEY:-}" ]; then
|
|
73
|
+
echo "⚠️ Warning: OPENAI_API_KEY not set (required for meta-model gpt-4o-mini)."
|
|
74
|
+
read -p "Continue anyway? (y/N) " -n 1 -r
|
|
75
|
+
echo
|
|
76
|
+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
77
|
+
exit 1
|
|
78
|
+
fi
|
|
79
|
+
fi
|
|
80
|
+
|
|
81
|
+
if [ -z "${BACKEND_BASE_URL:-}" ]; then
|
|
82
|
+
BACKEND_BASE_URL="http://localhost:8000"
|
|
83
|
+
fi
|
|
84
|
+
BACKEND_URL="$BACKEND_BASE_URL"
|
|
85
|
+
BACKEND_URL_NO_API="${BACKEND_URL%/api}"
|
|
86
|
+
|
|
87
|
+
echo ""
|
|
88
|
+
echo "🔧 Debug Info:"
|
|
89
|
+
echo " BACKEND_BASE_URL current: $BACKEND_BASE_URL"
|
|
90
|
+
echo " BACKEND_URL: $BACKEND_URL"
|
|
91
|
+
echo ""
|
|
92
|
+
|
|
93
|
+
echo "✅ SYNTH_API_KEY: ${SYNTH_API_KEY:0:20}..."
|
|
94
|
+
echo "✅ ENVIRONMENT_API_KEY: ${ENVIRONMENT_API_KEY:0:20}..."
|
|
95
|
+
if [ -n "${GROQ_API_KEY:-}" ]; then
|
|
96
|
+
echo "✅ GROQ_API_KEY: ${GROQ_API_KEY:0:20}..."
|
|
97
|
+
fi
|
|
98
|
+
if [ -n "${OPENAI_API_KEY:-}" ]; then
|
|
99
|
+
echo "✅ OPENAI_API_KEY: ${OPENAI_API_KEY:0:20}..."
|
|
100
|
+
fi
|
|
101
|
+
echo "✅ Backend URL: $BACKEND_URL"
|
|
102
|
+
echo ""
|
|
103
|
+
|
|
104
|
+
CONFIG_PATH="examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml"
|
|
105
|
+
|
|
106
|
+
# ALWAYS read task_app_url from TOML (never use any pre-existing shell variable)
|
|
107
|
+
# Override only if TASK_APP_URL_OVERRIDE is explicitly set
|
|
108
|
+
if [ -n "${TASK_APP_URL_OVERRIDE:-}" ]; then
|
|
109
|
+
TASK_APP_URL="$TASK_APP_URL_OVERRIDE"
|
|
110
|
+
echo "📝 Using OVERRIDE task app URL: $TASK_APP_URL"
|
|
111
|
+
else
|
|
112
|
+
# Extract task_app_url from TOML using grep/sed (no Python dependencies)
|
|
113
|
+
# Pattern: task_app_url = "https://..."
|
|
114
|
+
TASK_APP_URL="$(grep "^task_app_url" "$CONFIG_PATH" | sed -E 's/^[^=]*=[[:space:]]*"([^"]*)".*/\1/' | head -1)"
|
|
115
|
+
if [ -z "$TASK_APP_URL" ]; then
|
|
116
|
+
echo "❌ ERROR: task_app_url not found in $CONFIG_PATH" >&2
|
|
117
|
+
echo " Please ensure the config file contains: task_app_url = \"...\"" >&2
|
|
118
|
+
exit 1
|
|
119
|
+
fi
|
|
120
|
+
echo "📝 Task app URL from TOML: $TASK_APP_URL"
|
|
121
|
+
fi
|
|
122
|
+
echo ""
|
|
123
|
+
|
|
124
|
+
echo "🔍 Checking if Banking77 pipeline task app is running on ${TASK_APP_URL}..."
|
|
125
|
+
if ! curl -s -f -H "X-API-Key: $ENVIRONMENT_API_KEY" "$TASK_APP_URL/health" > /dev/null 2>&1; then
|
|
126
|
+
cat <<EOF
|
|
127
|
+
❌ Error: Banking77 pipeline task app is not running on ${TASK_APP_URL}
|
|
128
|
+
|
|
129
|
+
Start it with:
|
|
130
|
+
uvx synth-ai deploy banking77-pipeline --runtime uvicorn --port 8112 --env-file .env --follow
|
|
131
|
+
# or deploy to Modal dev: modal deploy --env dev examples/task_apps/banking77_pipeline/deploy_wrapper.py
|
|
132
|
+
EOF
|
|
133
|
+
exit 1
|
|
134
|
+
fi
|
|
135
|
+
echo "✅ Pipeline task app is healthy"
|
|
136
|
+
echo ""
|
|
137
|
+
|
|
138
|
+
echo "🔍 Checking backend connection to $BACKEND_URL..."
|
|
139
|
+
if ! curl -s -f "$BACKEND_URL_NO_API/api/health" > /dev/null 2>&1; then
|
|
140
|
+
echo "⚠️ Warning: Cannot connect to backend at $BACKEND_URL"
|
|
141
|
+
read -p "Continue anyway? (y/N) " -n 1 -r
|
|
142
|
+
echo
|
|
143
|
+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
144
|
+
exit 1
|
|
145
|
+
fi
|
|
146
|
+
else
|
|
147
|
+
echo "✅ Backend is healthy"
|
|
148
|
+
fi
|
|
149
|
+
echo ""
|
|
150
|
+
|
|
151
|
+
echo "🚀 Starting MIPROv2 training..."
|
|
152
|
+
echo "Config: $CONFIG_PATH"
|
|
153
|
+
echo ""
|
|
154
|
+
echo "Multi-Step Flow:"
|
|
155
|
+
echo " 1. Bootstrap: two-module pipeline on seeds [0-14]"
|
|
156
|
+
echo " 2. Optimisation: 5 iterations × 2 variants (classifier + calibrator each evaluation)"
|
|
157
|
+
echo " 3. Held-out evaluation on seeds [40-49]"
|
|
158
|
+
echo ""
|
|
159
|
+
|
|
160
|
+
export BACKEND_BASE_URL="$BACKEND_URL"
|
|
161
|
+
export SYNTH_BASE_URL="$BACKEND_URL"
|
|
162
|
+
|
|
163
|
+
uvx synth-ai train \
|
|
164
|
+
--type prompt_learning \
|
|
165
|
+
--config "$CONFIG_PATH" \
|
|
166
|
+
--backend "$BACKEND_URL" \
|
|
167
|
+
--poll
|
|
168
|
+
|
|
169
|
+
echo ""
|
|
170
|
+
echo "✅ MIPROv2 pipeline training complete!"
|
|
171
|
+
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Run MIPROv2 optimisation for the multi-step Banking77 pipeline using gemini-2.5-flash-lite as policy model
|
|
3
|
+
|
|
4
|
+
set -euo pipefail
|
|
5
|
+
|
|
6
|
+
echo "🔬 Running MIPROv2 on Banking77 Pipeline (Policy: gemini-2.5-flash-lite)"
|
|
7
|
+
echo "================================================================="
|
|
8
|
+
|
|
9
|
+
REPO_ROOT="$(cd "$(dirname "$0")/../../.." && pwd)"
|
|
10
|
+
cd "$REPO_ROOT"
|
|
11
|
+
|
|
12
|
+
SAVED_BACKEND_BASE_URL="${BACKEND_BASE_URL:-}"
|
|
13
|
+
SAVED_SYNTH_BASE_URL="${SYNTH_BASE_URL:-}"
|
|
14
|
+
|
|
15
|
+
_load_env_file() {
|
|
16
|
+
local env_file="$1"
|
|
17
|
+
if [ -f "$env_file" ]; then
|
|
18
|
+
echo "📝 Loading environment variables from $env_file..."
|
|
19
|
+
while IFS= read -r line || [ -n "$line" ]; do
|
|
20
|
+
# Skip comments and empty lines
|
|
21
|
+
[[ "$line" =~ ^[[:space:]]*# ]] && continue
|
|
22
|
+
[[ -z "${line// }" ]] && continue
|
|
23
|
+
# Only process lines with '=' character
|
|
24
|
+
[[ ! "$line" =~ = ]] && continue
|
|
25
|
+
|
|
26
|
+
if [[ "$line" =~ ^[[:space:]]*BACKEND_BASE_URL= ]]; then
|
|
27
|
+
if [ -z "$SAVED_BACKEND_BASE_URL" ]; then
|
|
28
|
+
export "$line" 2>/dev/null || true
|
|
29
|
+
else
|
|
30
|
+
echo " ⚠️ Skipping BACKEND_BASE_URL from .env (using environment value)"
|
|
31
|
+
fi
|
|
32
|
+
elif [[ "$line" =~ ^[[:space:]]*SYNTH_BASE_URL= ]]; then
|
|
33
|
+
if [ -z "$SAVED_SYNTH_BASE_URL" ]; then
|
|
34
|
+
export "$line" 2>/dev/null || true
|
|
35
|
+
else
|
|
36
|
+
echo " ⚠️ Skipping SYNTH_BASE_URL from .env (using environment value)"
|
|
37
|
+
fi
|
|
38
|
+
elif [[ "$line" =~ ^[[:space:]]*[A-Za-z_][A-Za-z0-9_]*= ]]; then
|
|
39
|
+
export "$line" 2>/dev/null || true
|
|
40
|
+
fi
|
|
41
|
+
done < "$env_file"
|
|
42
|
+
fi
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
_load_env_file "$REPO_ROOT/.env"
|
|
46
|
+
_load_env_file "$REPO_ROOT/examples/rl/.env"
|
|
47
|
+
|
|
48
|
+
if [ -n "$SAVED_BACKEND_BASE_URL" ]; then
|
|
49
|
+
export BACKEND_BASE_URL="$SAVED_BACKEND_BASE_URL"
|
|
50
|
+
echo "✅ Using BACKEND_BASE_URL from environment: $BACKEND_BASE_URL"
|
|
51
|
+
fi
|
|
52
|
+
if [ -n "$SAVED_SYNTH_BASE_URL" ]; then
|
|
53
|
+
export SYNTH_BASE_URL="$SAVED_SYNTH_BASE_URL"
|
|
54
|
+
echo "✅ Using SYNTH_BASE_URL from environment: $SYNTH_BASE_URL"
|
|
55
|
+
fi
|
|
56
|
+
|
|
57
|
+
if [ -z "${SYNTH_API_KEY:-}" ]; then
|
|
58
|
+
echo "❌ Error: SYNTH_API_KEY not set"
|
|
59
|
+
exit 1
|
|
60
|
+
fi
|
|
61
|
+
|
|
62
|
+
if [ -z "${ENVIRONMENT_API_KEY:-}" ]; then
|
|
63
|
+
echo "❌ Error: ENVIRONMENT_API_KEY not set"
|
|
64
|
+
exit 1
|
|
65
|
+
fi
|
|
66
|
+
|
|
67
|
+
# Set Gemini API key from command line or environment
|
|
68
|
+
if [ -n "${GEMINI_API_KEY:-}" ]; then
|
|
69
|
+
export GEMINI_API_KEY="$GEMINI_API_KEY"
|
|
70
|
+
echo "✅ GEMINI_API_KEY: ${GEMINI_API_KEY:0:20}..."
|
|
71
|
+
else
|
|
72
|
+
echo "❌ Error: GEMINI_API_KEY not set"
|
|
73
|
+
echo " Set it with: export GEMINI_API_KEY=your_key"
|
|
74
|
+
exit 1
|
|
75
|
+
fi
|
|
76
|
+
|
|
77
|
+
if [ -z "${OPENAI_API_KEY:-}" ]; then
|
|
78
|
+
echo "⚠️ Warning: OPENAI_API_KEY not set (required for meta-model gpt-4o-mini)."
|
|
79
|
+
read -p "Continue anyway? (y/N) " -n 1 -r
|
|
80
|
+
echo
|
|
81
|
+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
82
|
+
exit 1
|
|
83
|
+
fi
|
|
84
|
+
fi
|
|
85
|
+
|
|
86
|
+
if [ -z "${BACKEND_BASE_URL:-}" ]; then
|
|
87
|
+
BACKEND_BASE_URL="http://localhost:8000"
|
|
88
|
+
fi
|
|
89
|
+
BACKEND_URL="$BACKEND_BASE_URL"
|
|
90
|
+
BACKEND_URL_NO_API="${BACKEND_URL%/api}"
|
|
91
|
+
|
|
92
|
+
echo ""
|
|
93
|
+
echo "🔧 Debug Info:"
|
|
94
|
+
echo " BACKEND_BASE_URL current: $BACKEND_BASE_URL"
|
|
95
|
+
echo " BACKEND_URL: $BACKEND_URL"
|
|
96
|
+
echo ""
|
|
97
|
+
|
|
98
|
+
echo "✅ SYNTH_API_KEY: ${SYNTH_API_KEY:0:20}..."
|
|
99
|
+
echo "✅ ENVIRONMENT_API_KEY: ${ENVIRONMENT_API_KEY:0:20}..."
|
|
100
|
+
echo "✅ GEMINI_API_KEY: ${GEMINI_API_KEY:0:20}..."
|
|
101
|
+
if [ -n "${OPENAI_API_KEY:-}" ]; then
|
|
102
|
+
echo "✅ OPENAI_API_KEY: ${OPENAI_API_KEY:0:20}..."
|
|
103
|
+
fi
|
|
104
|
+
echo "✅ Backend URL: $BACKEND_URL"
|
|
105
|
+
echo ""
|
|
106
|
+
|
|
107
|
+
CONFIG_PATH="examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml"
|
|
108
|
+
|
|
109
|
+
# ALWAYS read task_app_url from TOML (never use any pre-existing shell variable)
|
|
110
|
+
# Override only if TASK_APP_URL_OVERRIDE is explicitly set
|
|
111
|
+
if [ -n "${TASK_APP_URL_OVERRIDE:-}" ]; then
|
|
112
|
+
TASK_APP_URL="$TASK_APP_URL_OVERRIDE"
|
|
113
|
+
echo "📝 Using OVERRIDE task app URL: $TASK_APP_URL"
|
|
114
|
+
else
|
|
115
|
+
# Extract task_app_url from TOML using grep/sed (no Python dependencies)
|
|
116
|
+
# Pattern: task_app_url = "https://..."
|
|
117
|
+
TASK_APP_URL="$(grep "^task_app_url" "$CONFIG_PATH" | sed -E 's/^[^=]*=[[:space:]]*"([^"]*)".*/\1/' | head -1)"
|
|
118
|
+
if [ -z "$TASK_APP_URL" ]; then
|
|
119
|
+
echo "❌ ERROR: task_app_url not found in $CONFIG_PATH" >&2
|
|
120
|
+
echo " Please ensure the config file contains: task_app_url = \"...\"" >&2
|
|
121
|
+
exit 1
|
|
122
|
+
fi
|
|
123
|
+
echo "📝 Task app URL from TOML: $TASK_APP_URL"
|
|
124
|
+
fi
|
|
125
|
+
echo ""
|
|
126
|
+
|
|
127
|
+
echo "🔍 Checking if Banking77 pipeline task app is running on ${TASK_APP_URL}..."
|
|
128
|
+
if ! curl -s -f -H "X-API-Key: $ENVIRONMENT_API_KEY" "$TASK_APP_URL/health" > /dev/null 2>&1; then
|
|
129
|
+
cat <<EOF
|
|
130
|
+
❌ Error: Banking77 pipeline task app is not running on ${TASK_APP_URL}
|
|
131
|
+
|
|
132
|
+
Start it with:
|
|
133
|
+
uvx synth-ai deploy banking77-pipeline --runtime uvicorn --port 8112 --env-file .env --follow
|
|
134
|
+
# or deploy to Modal dev: modal deploy --env dev examples/task_apps/banking77_pipeline/deploy_wrapper.py
|
|
135
|
+
EOF
|
|
136
|
+
exit 1
|
|
137
|
+
fi
|
|
138
|
+
echo "✅ Pipeline task app is healthy"
|
|
139
|
+
echo ""
|
|
140
|
+
|
|
141
|
+
echo "🔍 Checking backend connection to $BACKEND_URL..."
|
|
142
|
+
if ! curl -s -f "$BACKEND_URL_NO_API/api/health" > /dev/null 2>&1; then
|
|
143
|
+
echo "⚠️ Warning: Cannot connect to backend at $BACKEND_URL"
|
|
144
|
+
read -p "Continue anyway? (y/N) " -n 1 -r
|
|
145
|
+
echo
|
|
146
|
+
if [[ ! $REPLY =~ ^[Yy]$ ]]; then
|
|
147
|
+
exit 1
|
|
148
|
+
fi
|
|
149
|
+
else
|
|
150
|
+
echo "✅ Backend is healthy"
|
|
151
|
+
fi
|
|
152
|
+
echo ""
|
|
153
|
+
|
|
154
|
+
echo "🚀 Starting MIPROv2 training with gemini-2.5-flash-lite as policy model..."
|
|
155
|
+
echo "Config: $CONFIG_PATH"
|
|
156
|
+
echo ""
|
|
157
|
+
echo "Multi-Step Flow:"
|
|
158
|
+
echo " 1. Bootstrap: two-module pipeline on seeds [0-14]"
|
|
159
|
+
echo " 2. Optimisation: 5 iterations × 2 variants (classifier + calibrator each evaluation)"
|
|
160
|
+
echo " 3. Held-out evaluation on seeds [40-49]"
|
|
161
|
+
echo " Policy model: gemini-2.5-flash-lite (Google)"
|
|
162
|
+
echo " Meta-model: gpt-4o-mini (OpenAI)"
|
|
163
|
+
echo ""
|
|
164
|
+
|
|
165
|
+
export BACKEND_BASE_URL="$BACKEND_URL"
|
|
166
|
+
export SYNTH_BASE_URL="$BACKEND_URL"
|
|
167
|
+
|
|
168
|
+
uvx synth-ai train \
|
|
169
|
+
--type prompt_learning \
|
|
170
|
+
--config "$CONFIG_PATH" \
|
|
171
|
+
--backend "$BACKEND_URL" \
|
|
172
|
+
--env-file "$REPO_ROOT/.env" \
|
|
173
|
+
--poll
|
|
174
|
+
|
|
175
|
+
echo ""
|
|
176
|
+
echo "✅ MIPROv2 pipeline training complete!"
|
|
177
|
+
|