synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- examples/README.md +1 -0
- examples/__init__.py +16 -0
- examples/analyze_semantic_words.sh +17 -0
- examples/baseline/banking77_baseline.py +243 -0
- examples/baseline/banking77_pipeline_baseline.py +294 -0
- examples/baseline/crafter_baseline.py +407 -0
- examples/baseline/pokemon_red_baseline.py +326 -0
- examples/baseline/simple_baseline.py +56 -0
- examples/baseline/warming_up_to_rl_baseline.py +239 -0
- examples/blog_posts/gepa/README.md +355 -0
- examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
- examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
- examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
- examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
- examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
- examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
- examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
- examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
- examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
- examples/blog_posts/gepa/gepa_baseline.py +204 -0
- examples/blog_posts/gepa/query_prompts_example.py +97 -0
- examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
- examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
- examples/blog_posts/gepa/task_apps.py +105 -0
- examples/blog_posts/gepa/test_gepa_local.sh +67 -0
- examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
- examples/blog_posts/mipro/README.md +415 -0
- examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
- examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
- examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
- examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
- examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
- examples/blog_posts/mipro/multi_step.md +79 -0
- examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
- examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
- examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/pokemon_vl/extract_images.py +239 -0
- examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
- examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
- examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
- examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
- examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
- examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
- examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
- examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
- examples/crafter_debug_render.py +186 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
- examples/gepa/banking77_pipeline_gepa.toml +96 -0
- examples/gepa/multi_stage_gepa_example.toml +84 -0
- examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
- examples/multi_step/SFT_README.md +147 -0
- examples/multi_step/configs/README_verilog_rl.md +77 -0
- examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
- examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
- examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
- examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
- examples/multi_step/configs/crafter_synth_backend.md +40 -0
- examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
- examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
- examples/multi_step/configs/verilog_rl_lora.toml +147 -0
- examples/multi_step/convert_traces_to_sft.py +84 -0
- examples/multi_step/crafter_rl_lora.md +70 -0
- examples/multi_step/judges/crafter_backend_judge.py +220 -0
- examples/multi_step/judges/verilog_backend_judge.py +234 -0
- examples/multi_step/readme.md +48 -0
- examples/multi_step/run_sft_qwen30b.sh +45 -0
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/multi_step/verilog_rl_lora.md +218 -0
- examples/qwen_coder/README.md +102 -0
- examples/qwen_coder/_shared.py +113 -0
- examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
- examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
- examples/qwen_coder/configs/coder_lora_small.toml +57 -0
- examples/qwen_coder/generate_dataset.py +98 -0
- examples/qwen_coder/infer_ft_smoke.py +65 -0
- examples/qwen_coder/infer_prod_proxy.py +73 -0
- examples/qwen_coder/infer_via_synth.py +87 -0
- examples/qwen_coder/scripts/infer_coder.sh +19 -0
- examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
- examples/qwen_coder/sft_full_17b.py +103 -0
- examples/qwen_coder/sft_lora_30b.py +110 -0
- examples/qwen_coder/subset_jsonl.py +39 -0
- examples/qwen_coder/todos.md +38 -0
- examples/qwen_coder/validate_jsonl.py +60 -0
- examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
- examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
- examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
- examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
- examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
- examples/qwen_vl/QUICKSTART.md +327 -0
- examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
- examples/qwen_vl/README.md +152 -0
- examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
- examples/qwen_vl/RL_VISION_TESTING.md +333 -0
- examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
- examples/qwen_vl/SETUP_COMPLETE.md +274 -0
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
- examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
- examples/qwen_vl/__init__.py +2 -0
- examples/qwen_vl/collect_data_via_cli.md +415 -0
- examples/qwen_vl/collect_vision_traces.py +368 -0
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
- examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
- examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
- examples/qwen_vl/configs/filter_vision_test.toml +8 -0
- examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
- examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
- examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
- examples/qwen_vl/run_vision_comparison.sh +61 -0
- examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
- examples/qwen_vl/test_image_validation.py +201 -0
- examples/qwen_vl/test_sft_vision_data.py +110 -0
- examples/rl/README.md +169 -0
- examples/rl/configs/eval_base_qwen.toml +17 -0
- examples/rl/configs/eval_rl_qwen.toml +13 -0
- examples/rl/configs/rl_from_base_qwen.toml +62 -0
- examples/rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/rl/configs/rl_from_ft_qwen.toml +37 -0
- examples/rl/download_dataset.py +80 -0
- examples/rl/run_eval.py +436 -0
- examples/rl/run_rl_and_save.py +111 -0
- examples/rl/task_app/README.md +21 -0
- {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
- examples/rl/task_app/math_task_app.py +111 -0
- examples/run_crafter_demo.sh +10 -0
- examples/sdk_prompt_learning_example.py +55 -0
- examples/sft/README.md +139 -0
- examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
- examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
- examples/sft/evaluate.py +117 -0
- examples/sft/export_dataset.py +120 -0
- examples/sft/generate_traces.py +164 -0
- examples/swe/__init__.py +12 -0
- examples/swe/task_app/README.md +135 -0
- examples/swe/task_app/__init__.py +2 -0
- examples/swe/task_app/grpo_swe_mini.py +604 -0
- examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
- examples/swe/task_app/hosted/README.md +173 -0
- examples/swe/task_app/hosted/__init__.py +5 -0
- examples/swe/task_app/hosted/branching.py +143 -0
- examples/swe/task_app/hosted/environment_routes.py +1289 -0
- examples/swe/task_app/hosted/envs/__init__.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
- examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
- examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
- examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
- examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
- examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
- examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
- examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
- examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
- examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
- examples/swe/task_app/hosted/hosted_app.py +204 -0
- examples/swe/task_app/hosted/inference/__init__.py +5 -0
- examples/swe/task_app/hosted/inference/openai_client.py +584 -0
- examples/swe/task_app/hosted/main.py +100 -0
- examples/swe/task_app/hosted/policy_routes.py +1094 -0
- examples/swe/task_app/hosted/registry.py +195 -0
- examples/swe/task_app/hosted/rollout.py +1905 -0
- examples/swe/task_app/hosted/storage/__init__.py +5 -0
- examples/swe/task_app/hosted/storage/volume.py +211 -0
- examples/swe/task_app/hosted/test_agents.py +161 -0
- examples/swe/task_app/hosted/test_service.py +136 -0
- examples/swe/task_app/hosted/utils.py +62 -0
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
- examples/task_apps/TESTING.md +275 -0
- examples/task_apps/banking77/__init__.py +6 -0
- examples/task_apps/banking77/banking77_task_app.py +912 -0
- examples/task_apps/banking77/deploy_wrapper.py +46 -0
- examples/task_apps/banking77_pipeline/__init__.py +6 -0
- examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
- examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
- examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
- examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
- examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
- examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
- examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
- examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
- examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
- examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
- examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
- examples/task_apps/crafter/task_app/README.md +42 -0
- examples/task_apps/crafter/task_app/__init__.py +5 -0
- examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
- examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
- examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
- examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
- examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
- examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
- examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
- examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
- examples/task_apps/enron/__init__.py +2 -0
- examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
- examples/task_apps/enron/filter_sft.toml +5 -0
- examples/task_apps/enron/task_app/README.md +14 -0
- examples/task_apps/enron/task_app/__init__.py +1 -0
- examples/task_apps/enron/task_app/grpo_enron.py +906 -0
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
- examples/task_apps/enron/tests/__init__.py +4 -0
- examples/task_apps/enron/tests/conftest.py +115 -0
- examples/task_apps/enron/tests/integration/__init__.py +4 -0
- examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
- examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
- examples/task_apps/enron/tests/unit/__init__.py +4 -0
- examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
- examples/task_apps/gepa_benchmarks/__init__.py +7 -0
- examples/task_apps/gepa_benchmarks/common.py +260 -0
- examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
- examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
- examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
- examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
- examples/task_apps/math/README.md +21 -0
- examples/task_apps/math/math_single_step.py +1000 -0
- examples/task_apps/math/math_task_app.py +115 -0
- examples/task_apps/pokemon_battle/__init__.py +2 -0
- examples/task_apps/pokemon_battle/modal_app.py +104 -0
- examples/task_apps/pokemon_battle/task_app/README.md +68 -0
- examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
- examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
- examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
- examples/task_apps/pokemon_red/README.md +356 -0
- examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
- examples/task_apps/pokemon_red/__init__.py +3 -0
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
- examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
- examples/task_apps/pokemon_red/task_app.py +1048 -0
- examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
- examples/task_apps/sokoban/README.md +306 -0
- examples/task_apps/sokoban/__init__.py +3 -0
- examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
- examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
- examples/task_apps/sokoban/filter_sft.toml +5 -0
- examples/task_apps/sokoban/task_app.py +1058 -0
- examples/task_apps/sokoban/tests/__init__.py +4 -0
- examples/task_apps/sokoban/tests/conftest.py +113 -0
- examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
- examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
- examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
- examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
- examples/task_apps/verilog/__init__.py +1 -0
- examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
- examples/task_apps/verilog/filter_sft.toml +5 -0
- examples/task_apps/verilog/task_app/README.md +12 -0
- examples/task_apps/verilog/task_app/__init__.py +1 -0
- examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
- examples/task_apps/verilog/tests/__init__.py +4 -0
- examples/task_apps/verilog/tests/conftest.py +115 -0
- examples/task_apps/verilog/tests/integration/__init__.py +4 -0
- examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
- examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
- examples/task_apps/verilog/tests/unit/__init__.py +4 -0
- examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
- examples/tunnel_gepa_banking77/README.md +106 -0
- examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
- examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
- examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
- examples/vlm/PROPOSAL.md +53 -0
- examples/vlm/README.md +68 -0
- examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
- examples/vlm/crafter_image_only_agent.py +207 -0
- examples/vlm/crafter_openai_vlm_agent.py +275 -0
- examples/vlm/filter_image_rows.py +63 -0
- examples/vlm/run_crafter_vlm_benchmark.py +316 -0
- examples/warming_up_to_rl/_utils.py +92 -0
- examples/warming_up_to_rl/analyze_trace_db.py +422 -0
- examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
- examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
- examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
- examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
- examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
- examples/warming_up_to_rl/export_trace_sft.py +837 -0
- examples/warming_up_to_rl/groq_test.py +97 -0
- examples/warming_up_to_rl/manage_secrets.py +131 -0
- examples/warming_up_to_rl/old/event_rewards.md +234 -0
- examples/warming_up_to_rl/old/notes.md +73 -0
- examples/warming_up_to_rl/readme.md +110 -0
- examples/warming_up_to_rl/run_eval.py +736 -0
- examples/warming_up_to_rl/run_fft_and_save.py +380 -0
- examples/warming_up_to_rl/run_local_rollout.py +239 -0
- examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
- examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
- examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
- examples/warming_up_to_rl/run_rl_and_save.py +124 -0
- examples/warming_up_to_rl/run_rollout_remote.py +156 -0
- examples/warming_up_to_rl/task_app/README.md +42 -0
- examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
- examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
- examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
- examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
- examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
- examples/workflows/math_rl/download_dataset.py +80 -0
- examples/workflows/math_rl/run_eval.py +436 -0
- examples/workflows/math_rl/run_rl_and_save.py +111 -0
- synth_ai/__init__.py +47 -23
- synth_ai/_utils/__init__.py +47 -0
- synth_ai/_utils/base_url.py +10 -0
- synth_ai/_utils/http.py +10 -0
- synth_ai/_utils/prompts.py +10 -0
- synth_ai/_utils/task_app_state.py +12 -0
- synth_ai/_utils/user_config.py +10 -0
- synth_ai/api/models/supported.py +514 -0
- synth_ai/api/train/__init__.py +60 -2
- synth_ai/api/train/builders.py +347 -39
- synth_ai/api/train/cli.py +895 -160
- synth_ai/api/train/config_finder.py +103 -25
- synth_ai/api/train/configs/__init__.py +65 -0
- synth_ai/api/train/configs/prompt_learning.py +496 -0
- synth_ai/api/train/configs/rl.py +188 -0
- synth_ai/api/train/configs/sft.py +99 -0
- synth_ai/api/train/configs/shared.py +81 -0
- synth_ai/api/train/env_resolver.py +70 -20
- synth_ai/api/train/pollers.py +29 -4
- synth_ai/api/train/prompt_learning.py +425 -0
- synth_ai/api/train/sft.py +390 -0
- synth_ai/api/train/supported_algos.py +147 -0
- synth_ai/api/train/task_app.py +6 -4
- synth_ai/api/train/utils.py +64 -52
- synth_ai/api/train/validators.py +1117 -0
- synth_ai/api/tunnel.py +49 -0
- synth_ai/auth/credentials.py +94 -0
- synth_ai/baseline/__init__.py +25 -0
- synth_ai/baseline/config.py +209 -0
- synth_ai/baseline/discovery.py +214 -0
- synth_ai/baseline/execution.py +146 -0
- synth_ai/cfgs.py +227 -0
- synth_ai/cli/__init__.py +85 -63
- synth_ai/cli/_modal_wrapper.py +31 -0
- synth_ai/cli/_storage.py +20 -0
- synth_ai/cli/_typer_patch.py +47 -0
- synth_ai/cli/_validate_task_app.py +29 -0
- synth_ai/cli/balance.py +16 -4
- synth_ai/cli/calc.py +36 -21
- synth_ai/cli/claude.py +70 -0
- synth_ai/cli/codex.py +267 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/baseline/__init__.py +12 -0
- synth_ai/cli/commands/baseline/core.py +637 -0
- synth_ai/cli/commands/baseline/list.py +93 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1112 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1437 -0
- synth_ai/cli/commands/status/__init__.py +66 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/session.py +183 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/subcommands/usage.py +203 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +200 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/validation.py +386 -0
- synth_ai/cli/demo.py +32 -140
- synth_ai/cli/deploy.py +233 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/legacy_root_backup.py +28 -22
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/task_app_discovery.py +7 -0
- synth_ai/cli/lib/task_app_env.py +518 -0
- synth_ai/cli/mcp.py +34 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/opencode.py +256 -0
- synth_ai/cli/recent.py +13 -7
- synth_ai/cli/rl_demo.py +156 -116
- synth_ai/cli/root.py +131 -132
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +49 -0
- synth_ai/cli/status.py +7 -125
- synth_ai/cli/task_app_deploy.py +7 -0
- synth_ai/cli/task_app_list.py +25 -0
- synth_ai/cli/task_app_modal_serve.py +11 -0
- synth_ai/cli/task_app_serve.py +11 -0
- synth_ai/cli/task_apps.py +2284 -257
- synth_ai/cli/traces.py +9 -5
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +5 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/cli/watch.py +13 -18
- synth_ai/demos/__init__.py +10 -0
- synth_ai/demos/core/__init__.py +28 -1
- synth_ai/demos/core/cli.py +579 -291
- synth_ai/demos/crafter/__init__.py +1 -0
- synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/demos/demo_registry.py +176 -0
- synth_ai/demos/demo_task_apps/__init__.py +3 -3
- synth_ai/demos/demo_task_apps/core.py +64 -28
- synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/app.py +2 -1
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
- synth_ai/demos/math/__init__.py +1 -0
- synth_ai/demos/math/_common.py +16 -0
- synth_ai/demos/math/app.py +38 -0
- synth_ai/demos/math/config.toml +76 -0
- synth_ai/demos/math/deploy_modal.py +54 -0
- synth_ai/demos/math/modal_task_app.py +703 -0
- synth_ai/demos/math/task_app_entry.py +51 -0
- synth_ai/environments/environment/core.py +7 -1
- synth_ai/environments/examples/bandit/engine.py +12 -5
- synth_ai/environments/examples/bandit/environment.py +0 -1
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
- synth_ai/environments/examples/crafter_classic/environment.py +93 -2
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
- synth_ai/environments/examples/enron/engine.py +7 -2
- synth_ai/environments/examples/enron/environment.py +68 -0
- synth_ai/environments/examples/red/engine.py +60 -12
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
- synth_ai/environments/examples/red/environment.py +86 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/environments/examples/sokoban/taskset.py +116 -0
- synth_ai/environments/examples/verilog/engine.py +104 -12
- synth_ai/environments/examples/wordle/environment.py +0 -1
- synth_ai/environments/reproducibility/tree.py +5 -6
- synth_ai/environments/service/app.py +11 -12
- synth_ai/environments/service/core_routes.py +10 -9
- synth_ai/environments/stateful/engine.py +1 -1
- synth_ai/environments/tasks/core.py +1 -0
- synth_ai/environments/tasks/filters.py +5 -6
- synth_ai/environments/tasks/utils.py +4 -5
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/base.py +14 -5
- synth_ai/evals/client.py +82 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/http.py +8 -22
- synth_ai/http_client.py +45 -12
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +21 -7
- synth_ai/jobs/client.py +129 -80
- synth_ai/judge_schemas.py +127 -0
- synth_ai/learning/__init__.py +51 -6
- synth_ai/learning/algorithms.py +14 -0
- synth_ai/learning/client.py +122 -30
- synth_ai/learning/config.py +2 -40
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +4 -56
- synth_ai/learning/health.py +14 -8
- synth_ai/learning/jobs.py +43 -47
- synth_ai/learning/prompt_learning_client.py +276 -0
- synth_ai/learning/prompt_learning_types.py +185 -0
- synth_ai/{rl → learning/rl}/__init__.py +14 -5
- synth_ai/learning/rl/client.py +269 -0
- synth_ai/learning/rl/config.py +31 -0
- synth_ai/{rl → learning/rl}/contracts.py +5 -10
- synth_ai/{rl → learning/rl}/env_keys.py +45 -16
- synth_ai/learning/rl/secrets.py +13 -0
- synth_ai/learning/rl_client.py +2 -253
- synth_ai/learning/sft/__init__.py +29 -0
- synth_ai/learning/sft/client.py +68 -0
- synth_ai/learning/sft/config.py +270 -0
- synth_ai/learning/sft/data.py +698 -0
- synth_ai/learning/sse.py +25 -26
- synth_ai/learning/validators.py +29 -25
- synth_ai/mcp/__init__.py +5 -0
- synth_ai/mcp/__main__.py +8 -0
- synth_ai/mcp/main.py +254 -0
- synth_ai/mcp/setup.py +100 -0
- synth_ai/modal.py +257 -0
- synth_ai/pricing/__init__.py +3 -0
- synth_ai/pricing/model_pricing.py +64 -0
- synth_ai/session/__init__.py +75 -0
- synth_ai/session/client.py +383 -0
- synth_ai/session/constants.py +63 -0
- synth_ai/session/exceptions.py +105 -0
- synth_ai/session/manager.py +139 -0
- synth_ai/session/models.py +89 -0
- synth_ai/session/query.py +110 -0
- synth_ai/spec/__init__.py +46 -0
- synth_ai/spec/dataclasses.py +149 -0
- synth_ai/spec/loader.py +144 -0
- synth_ai/spec/serializer.py +199 -0
- synth_ai/spec/validation.py +250 -0
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +589 -0
- synth_ai/streaming/streamer.py +320 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/__init__.py +50 -30
- synth_ai/task/apps/__init__.py +63 -19
- synth_ai/task/auth.py +35 -23
- synth_ai/task/client.py +15 -13
- synth_ai/task/config.py +261 -0
- synth_ai/task/contracts.py +165 -64
- synth_ai/task/datasets.py +9 -6
- synth_ai/task/errors.py +11 -10
- synth_ai/task/health.py +17 -11
- synth_ai/task/inference_api.py +101 -0
- synth_ai/task/json.py +58 -24
- synth_ai/task/proxy.py +59 -66
- synth_ai/task/rubrics/__init__.py +55 -0
- synth_ai/task/rubrics/loaders.py +156 -0
- synth_ai/task/rubrics/models.py +57 -0
- synth_ai/task/rubrics/scoring.py +116 -0
- synth_ai/task/rubrics/strict.py +149 -0
- synth_ai/task/rubrics.py +22 -15
- synth_ai/task/server.py +65 -31
- synth_ai/task/trace_correlation_helpers.py +328 -0
- synth_ai/task/tracing_utils.py +44 -28
- synth_ai/task/validators.py +449 -6
- synth_ai/task/vendors.py +5 -7
- synth_ai/tracing_v3/__init__.py +4 -0
- synth_ai/tracing_v3/abstractions.py +21 -4
- synth_ai/tracing_v3/config.py +167 -22
- synth_ai/tracing_v3/constants.py +21 -0
- synth_ai/tracing_v3/db_config.py +42 -29
- synth_ai/tracing_v3/decorators.py +80 -45
- synth_ai/tracing_v3/examples/basic_usage.py +15 -9
- synth_ai/tracing_v3/hooks.py +6 -4
- synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/tracing_v3/replica_sync.py +12 -7
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/session_tracer.py +73 -16
- synth_ai/tracing_v3/storage/base.py +89 -1
- synth_ai/tracing_v3/storage/config.py +63 -16
- synth_ai/tracing_v3/storage/factory.py +11 -9
- synth_ai/tracing_v3/storage/utils.py +15 -11
- synth_ai/tracing_v3/trace_utils.py +317 -0
- synth_ai/tracing_v3/turso/__init__.py +8 -21
- synth_ai/tracing_v3/turso/daemon.py +123 -15
- synth_ai/tracing_v3/turso/models.py +5 -2
- synth_ai/tracing_v3/turso/native_manager.py +1293 -0
- synth_ai/tracing_v3/utils.py +5 -4
- synth_ai/tunnel.py +143 -0
- synth_ai/tunnel_deploy.py +278 -0
- synth_ai/types.py +8 -0
- synth_ai/urls.py +11 -0
- synth_ai/utils/__init__.py +166 -0
- synth_ai/utils/agents.py +74 -0
- synth_ai/utils/apps.py +152 -0
- synth_ai/utils/base_url.py +94 -0
- synth_ai/utils/bin.py +39 -0
- synth_ai/utils/claude.py +36 -0
- synth_ai/utils/cli.py +284 -0
- synth_ai/utils/config.py +81 -0
- synth_ai/utils/env.py +346 -0
- synth_ai/utils/errors.py +85 -0
- synth_ai/utils/http.py +172 -0
- synth_ai/utils/json.py +72 -0
- synth_ai/utils/log_filter.py +99 -0
- synth_ai/utils/logging.py +198 -0
- synth_ai/utils/modal.py +299 -0
- synth_ai/utils/paths.py +95 -0
- synth_ai/utils/process.py +233 -0
- synth_ai/utils/prompts.py +39 -0
- synth_ai/utils/sqld.py +122 -0
- synth_ai/utils/ssl.py +25 -0
- synth_ai/utils/task_app_discovery.py +882 -0
- synth_ai/utils/task_app_env.py +186 -0
- synth_ai/utils/task_app_state.py +318 -0
- synth_ai/utils/tunnel/__init__.py +12 -0
- synth_ai/utils/tunnel/config.py +55 -0
- synth_ai/utils/user_config.py +137 -0
- synth_ai/uvicorn.py +77 -0
- synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
- synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
- synth_ai/cli/man.py +0 -106
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/demo_registry.py +0 -258
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -107
- synth_ai/install_sqld.sh +0 -40
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/task/apps/grpo_crafter.py +0 -438
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/turso/manager.py +0 -774
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
- synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
- {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
- {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
- {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
- {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
- {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
- {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
- {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
- {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
- /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
- /synth_ai/{learning/filtering.py → py.typed} +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
# Banking77 GEPA Optimization via Cloudflare Tunnel
|
|
2
|
+
|
|
3
|
+
This example demonstrates how to use Cloudflare Tunnel to expose a local Banking77 task app to Synth's production backend for GEPA prompt optimization.
|
|
4
|
+
|
|
5
|
+
## Overview
|
|
6
|
+
|
|
7
|
+
Instead of deploying to Modal or running a local backend, this example:
|
|
8
|
+
1. Deploys the Banking77 task app locally
|
|
9
|
+
2. Exposes it via Cloudflare Tunnel (free quick tunnels)
|
|
10
|
+
3. Runs GEPA optimization against Synth's production backend
|
|
11
|
+
|
|
12
|
+
## Prerequisites
|
|
13
|
+
|
|
14
|
+
- `cloudflared` installed ([install guide](https://developers.cloudflare.com/cloudflare-one/networks/connectors/cloudflare-tunnel/do-more-with-tunnels/local-management/create-local-tunnel/))
|
|
15
|
+
- `SYNTH_API_KEY` set (get from [Synth Dashboard](https://app.usesynth.ai/api-keys))
|
|
16
|
+
- `ENVIRONMENT_API_KEY` set (or auto-generated)
|
|
17
|
+
- `GROQ_API_KEY` set (optional, for LLM-guided mutations)
|
|
18
|
+
|
|
19
|
+
## Quick Start
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Run the example
|
|
23
|
+
./examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
The script will:
|
|
27
|
+
1. ✅ Check prerequisites
|
|
28
|
+
2. 🌐 Deploy Banking77 via Cloudflare Tunnel (quick mode)
|
|
29
|
+
3. 📝 Create GEPA config with tunnel URL
|
|
30
|
+
4. 🎯 Run GEPA optimization against prod backend
|
|
31
|
+
5. 📊 Show results
|
|
32
|
+
|
|
33
|
+
## What Happens
|
|
34
|
+
|
|
35
|
+
1. **Tunnel Deployment**: Creates an ephemeral `*.trycloudflare.com` URL
|
|
36
|
+
2. **Config Generation**: Creates `banking77_gepa_tunnel.toml` with the tunnel URL
|
|
37
|
+
3. **GEPA Training**: Runs prompt optimization using Synth's production backend
|
|
38
|
+
4. **Results**: View results in the Synth dashboard
|
|
39
|
+
|
|
40
|
+
## Manual Steps
|
|
41
|
+
|
|
42
|
+
If you prefer to run steps manually:
|
|
43
|
+
|
|
44
|
+
### Step 1: Deploy Tunnel
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
uvx synth-ai deploy \
|
|
48
|
+
--task-app examples/task_apps/banking77/banking77_task_app.py \
|
|
49
|
+
--runtime tunnel \
|
|
50
|
+
--tunnel-mode quick \
|
|
51
|
+
--port 8102 \
|
|
52
|
+
--env .env.tunnel
|
|
53
|
+
```
|
|
54
|
+
|
|
55
|
+
This will write `TASK_APP_URL` to `.env.tunnel`.
|
|
56
|
+
|
|
57
|
+
### Step 2: Run GEPA
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
export BACKEND_BASE_URL="https://api.usesynth.ai"
|
|
61
|
+
export TASK_APP_URL=$(grep TASK_APP_URL .env.tunnel | cut -d'=' -f2)
|
|
62
|
+
|
|
63
|
+
uvx synth-ai train \
|
|
64
|
+
--type prompt_learning \
|
|
65
|
+
--config examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml \
|
|
66
|
+
--backend https://api.usesynth.ai \
|
|
67
|
+
--poll
|
|
68
|
+
```
|
|
69
|
+
|
|
70
|
+
## Configuration
|
|
71
|
+
|
|
72
|
+
The generated config (`banking77_gepa_tunnel.toml`) includes:
|
|
73
|
+
- **Task App URL**: Cloudflare Tunnel URL (e.g., `https://abc123.trycloudflare.com`)
|
|
74
|
+
- **Backend**: Production backend (`https://api.usesynth.ai`)
|
|
75
|
+
- **GEPA Settings**: Standard Banking77 optimization parameters
|
|
76
|
+
|
|
77
|
+
## Notes
|
|
78
|
+
|
|
79
|
+
- **Quick Tunnels**: Ephemeral, free, no account needed
|
|
80
|
+
- **Tunnel Lifetime**: Tunnel closes when deployment process stops
|
|
81
|
+
- **Production Backend**: Uses Synth's hosted backend (no local setup needed)
|
|
82
|
+
- **Credentials**: Saved to `.env.tunnel` for reuse
|
|
83
|
+
|
|
84
|
+
## Troubleshooting
|
|
85
|
+
|
|
86
|
+
### Tunnel not accessible
|
|
87
|
+
- Check `cloudflared` is installed: `which cloudflared`
|
|
88
|
+
- Verify tunnel process is running
|
|
89
|
+
- Check firewall/network settings
|
|
90
|
+
|
|
91
|
+
### Backend connection failed
|
|
92
|
+
- Verify `SYNTH_API_KEY` is set correctly
|
|
93
|
+
- Check network connectivity to `https://api.usesynth.ai`
|
|
94
|
+
- Ensure API key has proper permissions
|
|
95
|
+
|
|
96
|
+
### Task app health check fails
|
|
97
|
+
- Verify `ENVIRONMENT_API_KEY` matches the one used in deployment
|
|
98
|
+
- Check task app logs for errors
|
|
99
|
+
- Ensure task app has `/health` endpoint
|
|
100
|
+
|
|
101
|
+
## Related Examples
|
|
102
|
+
|
|
103
|
+
- Local deployment: `examples/blog_posts/gepa/deploy_banking77_task_app.sh`
|
|
104
|
+
- Modal deployment: `examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh`
|
|
105
|
+
- GEPA configs: `examples/blog_posts/gepa/configs/`
|
|
106
|
+
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
[prompt_learning]
|
|
2
|
+
algorithm = "gepa"
|
|
3
|
+
task_app_url = "https://contains-voluntary-lip-carey.trycloudflare.com"
|
|
4
|
+
task_app_id = "banking77"
|
|
5
|
+
|
|
6
|
+
# Initial prompt pattern (pattern-based mode)
|
|
7
|
+
[prompt_learning.initial_prompt]
|
|
8
|
+
id = "banking77_pattern"
|
|
9
|
+
name = "Banking77 Classification Pattern"
|
|
10
|
+
|
|
11
|
+
[[prompt_learning.initial_prompt.messages]]
|
|
12
|
+
role = "system"
|
|
13
|
+
pattern = "You are an expert banking assistant. \n\n**Available Banking Intents:**\n{available_intents}\n\n**Task:**\nCall the `banking77_classify` tool with the `intent` parameter set to ONE of the intent labels listed above that best matches the customer query. The intent must be an exact match from the list."
|
|
14
|
+
order = 0
|
|
15
|
+
|
|
16
|
+
[[prompt_learning.initial_prompt.messages]]
|
|
17
|
+
role = "user"
|
|
18
|
+
pattern = "Customer Query: {query}\n\nClassify this query by calling the tool with the correct intent label from the list above."
|
|
19
|
+
order = 1
|
|
20
|
+
|
|
21
|
+
[prompt_learning.initial_prompt.wildcards]
|
|
22
|
+
query = "REQUIRED" # Will be provided by task app at runtime
|
|
23
|
+
available_intents = "OPTIONAL" # Intent list (numbered 1-77) will be provided by task app
|
|
24
|
+
|
|
25
|
+
# Policy configuration (model, provider, etc.)
|
|
26
|
+
[prompt_learning.policy]
|
|
27
|
+
inference_mode = "synth_hosted"
|
|
28
|
+
model = "openai/gpt-oss-20b"
|
|
29
|
+
provider = "groq"
|
|
30
|
+
temperature = 0.0
|
|
31
|
+
max_completion_tokens = 512
|
|
32
|
+
policy_name = "banking77-classifier" # Required for Banking77 task app
|
|
33
|
+
|
|
34
|
+
# Training split config
|
|
35
|
+
[prompt_learning.env_config]
|
|
36
|
+
pool = "train"
|
|
37
|
+
|
|
38
|
+
# GEPA-specific configuration with nested subsections (mirrors RL structure)
|
|
39
|
+
[prompt_learning.gepa]
|
|
40
|
+
env_name = "banking77"
|
|
41
|
+
proposer_type = "dspy"
|
|
42
|
+
|
|
43
|
+
# Rollout configuration (mirrors RL [rollout] section)
|
|
44
|
+
[prompt_learning.gepa.rollout]
|
|
45
|
+
budget = 100
|
|
46
|
+
max_concurrent = 20
|
|
47
|
+
minibatch_size = 10
|
|
48
|
+
|
|
49
|
+
# Evaluation configuration (mirrors RL [evaluation] section)
|
|
50
|
+
[prompt_learning.gepa.evaluation]
|
|
51
|
+
seeds = [
|
|
52
|
+
50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
|
|
53
|
+
60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
|
|
54
|
+
70, 71, 72, 73, 74, 75, 76, 77, 78, 79
|
|
55
|
+
] # Training seeds (30 seeds from train pool)
|
|
56
|
+
validation_seeds = [
|
|
57
|
+
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
|
|
58
|
+
10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
|
|
59
|
+
20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
|
|
60
|
+
30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
|
|
61
|
+
40, 41, 42, 43, 44, 45, 46, 47, 48, 49
|
|
62
|
+
] # Held-out validation seeds (50 seeds from validation pool - not in training)
|
|
63
|
+
validation_pool = "validation"
|
|
64
|
+
validation_top_k = 3
|
|
65
|
+
test_pool = [2, 3] # Test pool for final evaluation (small held-out set)
|
|
66
|
+
|
|
67
|
+
# Mutation configuration (LLM-guided mutation settings)
|
|
68
|
+
[prompt_learning.gepa.mutation]
|
|
69
|
+
rate = 0.3
|
|
70
|
+
llm_model = "openai/gpt-oss-120b"
|
|
71
|
+
llm_provider = "groq"
|
|
72
|
+
llm_inference_url = "https://api.groq.com/openai/v1"
|
|
73
|
+
|
|
74
|
+
# Population configuration (evolution parameters)
|
|
75
|
+
[prompt_learning.gepa.population]
|
|
76
|
+
initial_size = 10
|
|
77
|
+
num_generations = 3
|
|
78
|
+
children_per_generation = 12
|
|
79
|
+
crossover_rate = 0.5
|
|
80
|
+
selection_pressure = 1.0
|
|
81
|
+
patience_generations = 3
|
|
82
|
+
|
|
83
|
+
# Archive configuration (Pareto archive settings)
|
|
84
|
+
[prompt_learning.gepa.archive]
|
|
85
|
+
size = 40
|
|
86
|
+
pareto_set_size = 32
|
|
87
|
+
pareto_eps = 1e-6
|
|
88
|
+
feedback_fraction = 0.5
|
|
89
|
+
|
|
90
|
+
# Token and budget configuration
|
|
91
|
+
[prompt_learning.gepa.token]
|
|
92
|
+
# max_limit = 1000 # Uncomment to set a token limit
|
|
93
|
+
counting_model = "gpt-4"
|
|
94
|
+
enforce_pattern_limit = true
|
|
95
|
+
# max_spend_usd = 100.0 # Uncomment to set a budget cap
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Helper script to deploy tunnel and keep it running while training runs."""
|
|
3
|
+
import asyncio
|
|
4
|
+
import os
|
|
5
|
+
import signal
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# Add repo to path
|
|
11
|
+
repo_root = Path(__file__).parent.parent.parent
|
|
12
|
+
sys.path.insert(0, str(repo_root))
|
|
13
|
+
|
|
14
|
+
from synth_ai.cfgs import CloudflareTunnelDeployCfg
|
|
15
|
+
from synth_ai.tunnel_deploy import deploy_app_tunnel, _TUNNEL_PROCESSES
|
|
16
|
+
from synth_ai.tunnel import stop_tunnel
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
async def main():
|
|
20
|
+
"""Deploy tunnel and keep it running."""
|
|
21
|
+
port = int(sys.argv[1]) if len(sys.argv) > 1 else 8102
|
|
22
|
+
env_file = Path(sys.argv[2]) if len(sys.argv) > 2 else Path(".env.tunnel")
|
|
23
|
+
task_app_path = Path(sys.argv[3]) if len(sys.argv) > 3 else Path("examples/task_apps/banking77/banking77_task_app.py")
|
|
24
|
+
env_api_key = os.environ.get("ENVIRONMENT_API_KEY", "test-key")
|
|
25
|
+
|
|
26
|
+
cfg = CloudflareTunnelDeployCfg.create(
|
|
27
|
+
task_app_path=task_app_path,
|
|
28
|
+
env_api_key=env_api_key,
|
|
29
|
+
mode="quick",
|
|
30
|
+
port=port,
|
|
31
|
+
trace=False
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
print(f"🚀 Deploying tunnel on port {port}...")
|
|
36
|
+
url = await deploy_app_tunnel(cfg, env_file)
|
|
37
|
+
print(f"✅ Tunnel ready: {url}")
|
|
38
|
+
print(f"📝 URL written to: {env_file}")
|
|
39
|
+
print(f"⏳ Keeping tunnel running... (Press Ctrl+C to stop)")
|
|
40
|
+
|
|
41
|
+
# Keep process alive
|
|
42
|
+
while True:
|
|
43
|
+
# Check if processes are still running
|
|
44
|
+
if port in _TUNNEL_PROCESSES:
|
|
45
|
+
proc = _TUNNEL_PROCESSES[port]
|
|
46
|
+
if proc.poll() is not None:
|
|
47
|
+
print(f"❌ Tunnel process exited with code {proc.returncode}")
|
|
48
|
+
break
|
|
49
|
+
time.sleep(1)
|
|
50
|
+
except KeyboardInterrupt:
|
|
51
|
+
print("\n🛑 Stopping tunnel...")
|
|
52
|
+
finally:
|
|
53
|
+
if port in _TUNNEL_PROCESSES:
|
|
54
|
+
stop_tunnel(_TUNNEL_PROCESSES[port])
|
|
55
|
+
_TUNNEL_PROCESSES.pop(port, None)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
if __name__ == "__main__":
|
|
59
|
+
asyncio.run(main())
|
|
60
|
+
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
#!/bin/bash
|
|
2
|
+
# Deploy Banking77 task app via Cloudflare Tunnel and run GEPA optimization
|
|
3
|
+
# This example demonstrates using Cloudflare Tunnel to expose a local task app
|
|
4
|
+
# to Synth's production backend for prompt optimization.
|
|
5
|
+
|
|
6
|
+
set -e
|
|
7
|
+
|
|
8
|
+
echo "🚀 Banking77 GEPA Optimization via Cloudflare Tunnel"
|
|
9
|
+
echo "====================================================="
|
|
10
|
+
echo ""
|
|
11
|
+
|
|
12
|
+
# Load .env file if it exists
|
|
13
|
+
ENV_FILES=(".env" "$(dirname "$0")/../../../.env" "$HOME/.synth-ai/.env")
|
|
14
|
+
for env_file in "${ENV_FILES[@]}"; do
|
|
15
|
+
if [ -f "$env_file" ]; then
|
|
16
|
+
echo "📝 Loading environment from: $env_file"
|
|
17
|
+
set -a
|
|
18
|
+
source "$env_file"
|
|
19
|
+
set +a
|
|
20
|
+
break
|
|
21
|
+
fi
|
|
22
|
+
done
|
|
23
|
+
|
|
24
|
+
# Check required environment variables
|
|
25
|
+
if [ -z "$SYNTH_API_KEY" ]; then
|
|
26
|
+
echo "❌ ERROR: SYNTH_API_KEY not set"
|
|
27
|
+
echo " Get your API key from: https://app.usesynth.ai/api-keys"
|
|
28
|
+
exit 1
|
|
29
|
+
fi
|
|
30
|
+
|
|
31
|
+
if [ -z "$ENVIRONMENT_API_KEY" ]; then
|
|
32
|
+
echo "⚠️ ENVIRONMENT_API_KEY not set, generating one..."
|
|
33
|
+
export ENVIRONMENT_API_KEY=$(python3 -c 'import secrets; print(secrets.token_urlsafe(32))')
|
|
34
|
+
echo "✅ Generated ENVIRONMENT_API_KEY: ${ENVIRONMENT_API_KEY:0:20}..."
|
|
35
|
+
fi
|
|
36
|
+
|
|
37
|
+
if [ -z "$GROQ_API_KEY" ]; then
|
|
38
|
+
echo "⚠️ Warning: GROQ_API_KEY not set (needed for LLM-guided mutations in GEPA)"
|
|
39
|
+
echo " Set it with: export GROQ_API_KEY=your_key"
|
|
40
|
+
fi
|
|
41
|
+
|
|
42
|
+
# Use production backend
|
|
43
|
+
BACKEND_URL="${BACKEND_BASE_URL:-https://api.usesynth.ai}"
|
|
44
|
+
echo "📍 Backend URL: $BACKEND_URL"
|
|
45
|
+
echo "✅ SYNTH_API_KEY: ${SYNTH_API_KEY:0:20}..."
|
|
46
|
+
echo "✅ ENVIRONMENT_API_KEY: ${ENVIRONMENT_API_KEY:0:20}..."
|
|
47
|
+
echo ""
|
|
48
|
+
|
|
49
|
+
# Navigate to repo root (script is at examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh)
|
|
50
|
+
SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
|
|
51
|
+
REPO_ROOT="$(cd "$SCRIPT_DIR/../.." && pwd)"
|
|
52
|
+
cd "$REPO_ROOT"
|
|
53
|
+
|
|
54
|
+
# Check if cloudflared is installed
|
|
55
|
+
if ! command -v cloudflared &> /dev/null; then
|
|
56
|
+
echo "❌ ERROR: cloudflared not found"
|
|
57
|
+
echo " Install it:"
|
|
58
|
+
echo " macOS: brew install cloudflare/cloudflare/cloudflared"
|
|
59
|
+
echo " Linux/Windows: https://developers.cloudflare.com/cloudflare-one/networks/connectors/cloudflare-tunnel/do-more-with-tunnels/local-management/create-local-tunnel/"
|
|
60
|
+
exit 1
|
|
61
|
+
fi
|
|
62
|
+
|
|
63
|
+
echo "✅ cloudflared found: $(which cloudflared)"
|
|
64
|
+
echo ""
|
|
65
|
+
|
|
66
|
+
# Deploy task app via Cloudflare Tunnel
|
|
67
|
+
echo "🌐 Deploying Banking77 task app via Cloudflare Tunnel..."
|
|
68
|
+
echo ""
|
|
69
|
+
|
|
70
|
+
TASK_APP_PATH="$REPO_ROOT/examples/task_apps/banking77/banking77_task_app.py"
|
|
71
|
+
ENV_FILE="$REPO_ROOT/.env.tunnel"
|
|
72
|
+
|
|
73
|
+
if [ ! -f "$TASK_APP_PATH" ]; then
|
|
74
|
+
echo "❌ ERROR: Task app not found: $TASK_APP_PATH"
|
|
75
|
+
exit 1
|
|
76
|
+
fi
|
|
77
|
+
echo "✅ Task app found: $TASK_APP_PATH"
|
|
78
|
+
echo ""
|
|
79
|
+
|
|
80
|
+
# Create .env file if it doesn't exist (deploy command requires it to exist)
|
|
81
|
+
touch "$ENV_FILE"
|
|
82
|
+
|
|
83
|
+
# Deploy with quick tunnel (free, ephemeral) in background mode
|
|
84
|
+
# The deploy command returns immediately, keeping tunnel running headlessly
|
|
85
|
+
echo "🚀 Starting tunnel deployment (background mode)..."
|
|
86
|
+
uv run synth-ai deploy \
|
|
87
|
+
--task-app "$TASK_APP_PATH" \
|
|
88
|
+
--runtime tunnel \
|
|
89
|
+
--tunnel-mode quick \
|
|
90
|
+
--port 8102 \
|
|
91
|
+
--env "$ENV_FILE" \
|
|
92
|
+
--trace > /tmp/tunnel_deploy.log 2>&1 &
|
|
93
|
+
DEPLOY_PID=$!
|
|
94
|
+
|
|
95
|
+
# Wait for tunnel URL to be written to .env file
|
|
96
|
+
echo "⏳ Waiting for tunnel to be ready..."
|
|
97
|
+
TASK_APP_URL=""
|
|
98
|
+
for i in {1..30}; do
|
|
99
|
+
if [ -f "$ENV_FILE" ] && grep -q "^TASK_APP_URL=" "$ENV_FILE"; then
|
|
100
|
+
TASK_APP_URL=$(grep "^TASK_APP_URL=" "$ENV_FILE" | cut -d'=' -f2 | tr -d '"' | tr -d "'")
|
|
101
|
+
if [ -n "$TASK_APP_URL" ]; then
|
|
102
|
+
echo "✅ Tunnel URL found: $TASK_APP_URL"
|
|
103
|
+
# Wait a bit more for DNS propagation and tunnel to be fully ready
|
|
104
|
+
echo "⏳ Waiting for tunnel to be accessible..."
|
|
105
|
+
for j in {1..15}; do
|
|
106
|
+
if curl -s -f -H "X-API-Key: $ENVIRONMENT_API_KEY" "$TASK_APP_URL/health" > /dev/null 2>&1; then
|
|
107
|
+
echo "✅ Tunnel is accessible!"
|
|
108
|
+
break
|
|
109
|
+
fi
|
|
110
|
+
if [ $j -eq 15 ]; then
|
|
111
|
+
echo "⚠️ Tunnel not yet accessible, but continuing..."
|
|
112
|
+
fi
|
|
113
|
+
sleep 2
|
|
114
|
+
done
|
|
115
|
+
break
|
|
116
|
+
fi
|
|
117
|
+
fi
|
|
118
|
+
sleep 1
|
|
119
|
+
done
|
|
120
|
+
|
|
121
|
+
if [ -z "$TASK_APP_URL" ]; then
|
|
122
|
+
echo "❌ ERROR: Tunnel deployment failed or timed out"
|
|
123
|
+
echo " Check deployment logs: /tmp/tunnel_deploy.log"
|
|
124
|
+
kill $DEPLOY_PID 2>/dev/null || true
|
|
125
|
+
exit 1
|
|
126
|
+
fi
|
|
127
|
+
|
|
128
|
+
echo ""
|
|
129
|
+
echo "✅ Tunnel deployed: $TASK_APP_URL"
|
|
130
|
+
echo " Credentials saved to: $ENV_FILE"
|
|
131
|
+
echo " Tunnel process PID: $DEPLOY_PID (running in background)"
|
|
132
|
+
|
|
133
|
+
# Verify backend is accessible
|
|
134
|
+
echo ""
|
|
135
|
+
echo "🔍 Verifying backend connection..."
|
|
136
|
+
if curl -s -f "$BACKEND_URL/api/v1/health" > /dev/null 2>&1; then
|
|
137
|
+
echo "✅ Backend is accessible"
|
|
138
|
+
else
|
|
139
|
+
echo "⚠️ Warning: Cannot connect to backend at $BACKEND_URL"
|
|
140
|
+
echo " Continuing anyway..."
|
|
141
|
+
fi
|
|
142
|
+
|
|
143
|
+
# Create GEPA config with tunnel URL
|
|
144
|
+
CONFIG_DIR="$REPO_ROOT/examples/tunnel_gepa_banking77"
|
|
145
|
+
mkdir -p "$CONFIG_DIR"
|
|
146
|
+
CONFIG_FILE="$CONFIG_DIR/banking77_gepa_tunnel.toml"
|
|
147
|
+
|
|
148
|
+
echo ""
|
|
149
|
+
echo "📝 Creating GEPA config: $CONFIG_FILE"
|
|
150
|
+
# Copy the base config from existing Banking77 GEPA example and update task_app_url
|
|
151
|
+
BASE_CONFIG="$REPO_ROOT/examples/blog_posts/gepa/configs/banking77_gepa_local.toml"
|
|
152
|
+
if [ -f "$BASE_CONFIG" ]; then
|
|
153
|
+
# Copy base config and update task_app_url
|
|
154
|
+
cp "$BASE_CONFIG" "$CONFIG_FILE"
|
|
155
|
+
# Update task_app_url using sed (works on macOS and Linux)
|
|
156
|
+
if [[ "$OSTYPE" == "darwin"* ]]; then
|
|
157
|
+
sed -i '' "s|task_app_url = \".*\"|task_app_url = \"$TASK_APP_URL\"|" "$CONFIG_FILE"
|
|
158
|
+
else
|
|
159
|
+
sed -i "s|task_app_url = \".*\"|task_app_url = \"$TASK_APP_URL\"|" "$CONFIG_FILE"
|
|
160
|
+
fi
|
|
161
|
+
echo "✅ Config created from: $BASE_CONFIG"
|
|
162
|
+
echo " Updated task_app_url to: $TASK_APP_URL"
|
|
163
|
+
else
|
|
164
|
+
echo "❌ ERROR: Base config not found: $BASE_CONFIG"
|
|
165
|
+
echo " Please ensure the Banking77 GEPA example config exists"
|
|
166
|
+
exit 1
|
|
167
|
+
fi
|
|
168
|
+
|
|
169
|
+
# Run GEPA optimization
|
|
170
|
+
echo "🎯 Starting GEPA prompt optimization..."
|
|
171
|
+
echo " Config: $CONFIG_FILE"
|
|
172
|
+
echo " Backend: $BACKEND_URL"
|
|
173
|
+
echo " Task App: $TASK_APP_URL"
|
|
174
|
+
echo ""
|
|
175
|
+
echo "⚠️ Note: Keep the tunnel process running in another terminal"
|
|
176
|
+
echo " The tunnel will close when you stop the deployment process"
|
|
177
|
+
echo ""
|
|
178
|
+
|
|
179
|
+
export BACKEND_BASE_URL="$BACKEND_URL"
|
|
180
|
+
export SYNTH_BASE_URL="$BACKEND_URL"
|
|
181
|
+
|
|
182
|
+
# Run GEPA optimization
|
|
183
|
+
# The tunnel is already running (uvicorn in background thread, cloudflared process)
|
|
184
|
+
# We'll run GEPA training which will submit the job to the backend
|
|
185
|
+
echo ""
|
|
186
|
+
echo "🚀 Starting GEPA prompt optimization..."
|
|
187
|
+
echo " Config: $CONFIG_FILE"
|
|
188
|
+
echo " Backend: $BACKEND_URL"
|
|
189
|
+
echo " Task App: $TASK_APP_URL"
|
|
190
|
+
echo ""
|
|
191
|
+
echo "⚠️ Note: The tunnel process is running in the background."
|
|
192
|
+
echo " Keep this terminal open until training completes."
|
|
193
|
+
echo ""
|
|
194
|
+
|
|
195
|
+
export BACKEND_BASE_URL="$BACKEND_URL"
|
|
196
|
+
export SYNTH_BASE_URL="$BACKEND_URL"
|
|
197
|
+
|
|
198
|
+
# Run GEPA training
|
|
199
|
+
# Use --env-file to skip the interactive prompt
|
|
200
|
+
uv run synth-ai train \
|
|
201
|
+
--type prompt_learning \
|
|
202
|
+
--config "$CONFIG_FILE" \
|
|
203
|
+
--backend "$BACKEND_URL" \
|
|
204
|
+
--env-file "$ENV_FILE" \
|
|
205
|
+
--poll
|
|
206
|
+
|
|
207
|
+
echo ""
|
|
208
|
+
echo "✅ GEPA optimization complete!"
|
|
209
|
+
echo ""
|
|
210
|
+
|
|
211
|
+
# Cleanup: stop tunnel processes
|
|
212
|
+
echo "🧹 Cleaning up tunnel processes..."
|
|
213
|
+
kill $DEPLOY_PID 2>/dev/null || true
|
|
214
|
+
pkill -f "cloudflared.*8102" 2>/dev/null || true
|
|
215
|
+
pkill -f "uvicorn.*8102" 2>/dev/null || true
|
|
216
|
+
sleep 1
|
|
217
|
+
|
|
218
|
+
echo ""
|
|
219
|
+
echo "📊 Results:"
|
|
220
|
+
echo " - Config: $CONFIG_FILE"
|
|
221
|
+
echo " - Tunnel URL: $TASK_APP_URL"
|
|
222
|
+
echo " - Credentials: $ENV_FILE"
|
|
223
|
+
echo ""
|
|
224
|
+
echo "💡 To view results, check the job status in the Synth dashboard:"
|
|
225
|
+
echo " https://app.usesynth.ai"
|
|
226
|
+
|
examples/vlm/PROPOSAL.md
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# Crafter VLM Fine-Tuning Plan
|
|
2
|
+
|
|
3
|
+
## Why
|
|
4
|
+
|
|
5
|
+
Crafter observations already expose 64×64 RGB frames, but until now the stack only
|
|
6
|
+
surfaced text summaries. The new multimodal trace format allows us to:
|
|
7
|
+
|
|
8
|
+
- Retrieve and persist `observation_image_base64` on every step.
|
|
9
|
+
- Thread data URLs through policy prompts so LLMs see the raw frame.
|
|
10
|
+
- Emit multimodal content parts inside LLM call records (and therefore SFT datasets).
|
|
11
|
+
- Capture the same information inside the session trace (`policy_user_prompt` entries).
|
|
12
|
+
|
|
13
|
+
This unlocks a supervised dataset for vision‑language models that treat the frame as a
|
|
14
|
+
first-class token alongside the textual state summary.
|
|
15
|
+
|
|
16
|
+
## What Changed
|
|
17
|
+
|
|
18
|
+
1. **Environment wrappers** now attach `observation_image_base64`, `..._data_url`,
|
|
19
|
+
and image metadata to every observation (initialise + step).
|
|
20
|
+
2. **Crafter policy** augments the user prompt with `{"type":"image_url"}` segments
|
|
21
|
+
while still emitting the textual summary for language-only models.
|
|
22
|
+
3. **Tracing** stores structured prompt content and serialises it to JSON so prompts
|
|
23
|
+
with images survive the round-trip to SQLite/Turso.
|
|
24
|
+
4. **Dataset exporter** preserves multimodal content, flags rows with images, and
|
|
25
|
+
carries this metadata through SFT JSONL.
|
|
26
|
+
5. **Utility scripts** under `examples/vlm/` make it easy to sanity check frames,
|
|
27
|
+
filter datasets, and spin up an image-aware training job.
|
|
28
|
+
|
|
29
|
+
## Proposed Workflow
|
|
30
|
+
|
|
31
|
+
1. **Collect rollouts** with tracing enabled (either via the task app or scripted
|
|
32
|
+
runs) to populate `traces/v3/synth_ai.db`.
|
|
33
|
+
2. **Export** using `export_trace_sft.py` — images are embedded automatically.
|
|
34
|
+
3. **Filter** to rows with user images (see `filter_image_rows.py`) and optionally
|
|
35
|
+
build validation splits.
|
|
36
|
+
4. **Upload** the JSONL via `run_fft_and_save.py` or the Synth CLI.
|
|
37
|
+
5. **Train** with a VLM-capable base model (e.g. `openai/gpt-4o-mini-2024-07-18`)
|
|
38
|
+
using `configs/crafter_vlm_gpt4o.toml`.
|
|
39
|
+
6. **Evaluate** the resulting checkpoint on Crafter tasks (reuse the evaluation
|
|
40
|
+
harness from `examples/warming_up_to_rl` but now with multimodal prompts).
|
|
41
|
+
|
|
42
|
+
## Open Questions / Future Work
|
|
43
|
+
|
|
44
|
+
- **Longer context**: investigate packing multiple sequential frames per turn
|
|
45
|
+
(e.g. last N frames) as either separate image parts or a stitched sprite sheet.
|
|
46
|
+
- **Reward shaping**: extend metadata so SFT rows carry frame-level reward deltas,
|
|
47
|
+
enabling hybrid BC + value regression objectives.
|
|
48
|
+
- **Assistant images**: currently most assistant messages are textual; we could
|
|
49
|
+
experiment with returning thumbnails explaining the plan.
|
|
50
|
+
- **Automated filtering**: add CLI helpers to keep only turns whose tool calls led
|
|
51
|
+
to high-reward outcomes (e.g. achievements).
|
|
52
|
+
- **Evaluation**: define a reference set of human-labelled “vision checkpoints”
|
|
53
|
+
(e.g. recognise nearby resource, detect threats) to quantify multimodal progress.
|
examples/vlm/README.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# Crafter VLM Pipeline
|
|
2
|
+
|
|
3
|
+
This folder captures the reference workflow for fine-tuning Crafter policies with
|
|
4
|
+
multimodal (text + image) prompts. It stitches together the new image-aware tracing
|
|
5
|
+
plumbing with lightweight utilities for dataset curation and training.
|
|
6
|
+
|
|
7
|
+
## Quick Start
|
|
8
|
+
|
|
9
|
+
1. **Verify image capture**
|
|
10
|
+
```
|
|
11
|
+
uv run python examples/vlm/crafter_image_only_agent.py --seed 7 --steps 5
|
|
12
|
+
```
|
|
13
|
+
This writes PNG frames to `examples/vlm/output/frames/` and produces a JSONL preview
|
|
14
|
+
of OpenAI-style image-only user messages.
|
|
15
|
+
|
|
16
|
+
2. **Collect traced rollouts**
|
|
17
|
+
Use the Crafter task app (or your existing pipeline) with tracing enabled. The new
|
|
18
|
+
tracing schema automatically records `observation_image_base64` and stores image parts
|
|
19
|
+
in LM call records.
|
|
20
|
+
|
|
21
|
+
3. **Export multimodal SFT rows**
|
|
22
|
+
```
|
|
23
|
+
uv run python examples/warming_up_to_rl/export_trace_sft.py \
|
|
24
|
+
--db traces/v3/task_app_traces_<timestamp>.db \
|
|
25
|
+
--output examples/vlm/output/crafter_sft_full.jsonl
|
|
26
|
+
```
|
|
27
|
+
The exporter now emits `metadata.has_image`, `metadata.user_has_image`, and
|
|
28
|
+
`metadata.assistant_has_image` flags per turn.
|
|
29
|
+
|
|
30
|
+
4. **Filter to image-rich turns**
|
|
31
|
+
```
|
|
32
|
+
uv run python examples/vlm/filter_image_rows.py \
|
|
33
|
+
--input examples/vlm/output/crafter_sft_full.jsonl \
|
|
34
|
+
--output examples/vlm/output/crafter_vlm_dataset.jsonl
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
5. **(Optional) Split validation or augment**, then upload using the standard CLI:
|
|
38
|
+
```
|
|
39
|
+
uv run python examples/warming_up_to_rl/run_fft_and_save.py \
|
|
40
|
+
--toml examples/vlm/configs/crafter_vlm_gpt4o.toml \
|
|
41
|
+
--data examples/vlm/output/crafter_vlm_dataset.jsonl
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
## Config & Utilities
|
|
45
|
+
|
|
46
|
+
| File | Purpose |
|
|
47
|
+
| --- | --- |
|
|
48
|
+
| `configs/crafter_vlm_gpt4o.toml` | Sample Synth job targeting an image-capable model (`openai/gpt-4o-mini`). Set `job.data` or pass `--data` explicitly. |
|
|
49
|
+
| `crafter_image_only_agent.py` | Captures frames and builds image-only prompts for sanity checks. |
|
|
50
|
+
| `filter_image_rows.py` | Extracts rows with image parts from exported JSONL datasets. |
|
|
51
|
+
|
|
52
|
+
## Notes & Next Steps
|
|
53
|
+
|
|
54
|
+
- The training config assumes full-finetuning (`mode = "sft_offline"`). Adjust the
|
|
55
|
+
model id, hardware, or hyperparameters to match available infrastructure.
|
|
56
|
+
- Dataset rows emitted by `export_trace_sft.py` already contain OpenAI multimodal
|
|
57
|
+
content parts like:
|
|
58
|
+
```json
|
|
59
|
+
{
|
|
60
|
+
"role": "user",
|
|
61
|
+
"content": [
|
|
62
|
+
{"type": "text", "text": "..."},
|
|
63
|
+
{"type": "image_url", "image_url": {"url": "data:image/png;base64,..." }}
|
|
64
|
+
]
|
|
65
|
+
}
|
|
66
|
+
```
|
|
67
|
+
- See `PROPOSAL.md` for a deeper dive into outstanding work (longer rollouts,
|
|
68
|
+
richer multimodal augmentations, evaluation ideas).
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
[algorithm]
|
|
2
|
+
type = "offline"
|
|
3
|
+
method = "sft"
|
|
4
|
+
variety = "fft"
|
|
5
|
+
|
|
6
|
+
[job]
|
|
7
|
+
model = "openai/gpt-4o-mini-2024-07-18"
|
|
8
|
+
modalities = ["text", "image"]
|
|
9
|
+
# data = "examples/vlm/output/crafter_vlm_dataset.jsonl"
|
|
10
|
+
description = "Crafter VLM SFT (text + image prompts)"
|
|
11
|
+
|
|
12
|
+
[compute]
|
|
13
|
+
gpu_type = "A100"
|
|
14
|
+
gpu_count = 1
|
|
15
|
+
nodes = 1
|
|
16
|
+
|
|
17
|
+
[data]
|
|
18
|
+
topology = {}
|
|
19
|
+
# validation_path = "examples/vlm/output/crafter_vlm_dataset.val.jsonl"
|
|
20
|
+
|
|
21
|
+
[training]
|
|
22
|
+
mode = "sft_offline"
|
|
23
|
+
use_qlora = false
|
|
24
|
+
|
|
25
|
+
[training.validation]
|
|
26
|
+
enabled = true
|
|
27
|
+
evaluation_strategy = "steps"
|
|
28
|
+
eval_steps = 50
|
|
29
|
+
save_best_model_at_end = true
|
|
30
|
+
metric_for_best_model = "val.loss"
|
|
31
|
+
greater_is_better = false
|
|
32
|
+
|
|
33
|
+
[hyperparameters]
|
|
34
|
+
n_epochs = 1
|
|
35
|
+
train_kind = "fft"
|
|
36
|
+
per_device_batch = 1
|
|
37
|
+
gradient_accumulation_steps = 32
|
|
38
|
+
sequence_length = 4096
|
|
39
|
+
learning_rate = 1e-5
|
|
40
|
+
warmup_ratio = 0.03
|
|
41
|
+
weight_decay = 0.01
|
|
42
|
+
|
|
43
|
+
[hyperparameters.parallelism]
|
|
44
|
+
use_deepspeed = true
|
|
45
|
+
deepspeed_stage = 2
|
|
46
|
+
fsdp = false
|
|
47
|
+
bf16 = true
|
|
48
|
+
fp16 = false
|
|
49
|
+
activation_checkpointing = true
|