PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show

examples/analyze_semantic_words.sh +2 -2
examples/baseline/banking77_baseline.py +204 -0
examples/baseline/crafter_baseline.py +407 -0
examples/baseline/pokemon_red_baseline.py +326 -0
examples/baseline/simple_baseline.py +56 -0
examples/baseline/warming_up_to_rl_baseline.py +239 -0
examples/blog_posts/gepa/README.md +355 -0
examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
examples/blog_posts/gepa/gepa_baseline.py +204 -0
examples/blog_posts/gepa/query_prompts_example.py +97 -0
examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
examples/blog_posts/gepa/task_apps.py +105 -0
examples/blog_posts/gepa/test_gepa_local.sh +67 -0
examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/pokemon_vl/extract_images.py +239 -0
examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +6 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -38
examples/swe/task_app/hosted/policy_routes.py +17 -0
examples/swe/task_app/hosted/rollout.py +4 -2
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/banking77/__init__.py +6 -0
examples/task_apps/banking77/banking77_task_app.py +841 -0
examples/task_apps/banking77/deploy_wrapper.py +46 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/gepa_benchmarks/__init__.py +7 -0
examples/task_apps/gepa_benchmarks/common.py +260 -0
examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +288 -39
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
synth_ai/api/train/builders.py +99 -4
synth_ai/api/train/cli.py +516 -26
synth_ai/api/train/config_finder.py +13 -2
synth_ai/api/train/configs/__init__.py +23 -2
synth_ai/api/train/configs/prompt_learning.py +442 -0
synth_ai/api/train/configs/rl.py +61 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/api/train/task_app.py +1 -1
synth_ai/api/train/validators.py +277 -0
synth_ai/auth/credentials.py +119 -0
synth_ai/baseline/__init__.py +25 -0
synth_ai/baseline/config.py +209 -0
synth_ai/baseline/discovery.py +214 -0
synth_ai/baseline/execution.py +146 -0
synth_ai/cli/__init__.py +94 -18
synth_ai/cli/__main__.py +0 -0
synth_ai/cli/claude.py +70 -0
synth_ai/cli/codex.py +84 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +637 -0
synth_ai/cli/commands/baseline/list.py +93 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1112 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1436 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +200 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/validation.py +386 -0
synth_ai/cli/demo.py +30 -158
synth_ai/cli/deploy/__init__.py +43 -0
synth_ai/cli/deploy.py +162 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/legacy_root_backup.py +14 -8
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/opencode.py +107 -0
synth_ai/cli/root.py +9 -5
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +51 -1480
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -10
synth_ai/learning/__init__.py +10 -0
synth_ai/learning/prompt_learning_client.py +276 -0
synth_ai/learning/prompt_learning_types.py +184 -0
synth_ai/learning/rl/client.py +3 -1
synth_ai/pricing/__init__.py +2 -0
synth_ai/pricing/model_pricing.py +57 -0
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +518 -0
synth_ai/streaming/streamer.py +320 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +2 -0
synth_ai/task/tracing_utils.py +25 -25
synth_ai/task/validators.py +45 -9
synth_ai/task_app_cfgs.py +21 -0
synth_ai/tracing_v3/config.py +162 -19
synth_ai/tracing_v3/constants.py +1 -1
synth_ai/tracing_v3/db_config.py +24 -38
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/tracing_v3/storage/config.py +47 -13
synth_ai/tracing_v3/storage/factory.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +113 -11
synth_ai/tracing_v3/turso/native_manager.py +92 -16
synth_ai/types.py +8 -0
synth_ai/urls.py +11 -0
synth_ai/utils/__init__.py +30 -1
synth_ai/utils/agents.py +74 -0
synth_ai/utils/bin.py +39 -0
synth_ai/utils/cli.py +149 -5
synth_ai/utils/env.py +40 -33
synth_ai/utils/http.py +4 -1
synth_ai/utils/json.py +72 -0
synth_ai/utils/modal.py +285 -3
synth_ai/utils/paths.py +48 -0
synth_ai/utils/uvicorn.py +113 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0

examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml ADDED Viewed

@@ -0,0 +1,53 @@
+# MIPROv2 Prompt Learning for HotpotQA
+# Local backend configuration targeting the HotpotQA task app.
+[prompt_learning]
+algorithm = "mipro"
+task_app_url = "http://127.0.0.1:8110"
+task_app_id = "hotpotqa"
+# Seeds used during online optimisation
+evaluation_seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+# Held-out seeds for the final sweep
+test_pool = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[prompt_learning.initial_prompt]
+id = "hotpotqa_chain"
+name = "HotpotQA Multi-Hop Reasoning"
+[[prompt_learning.initial_prompt.messages]]
+role = "system"
+pattern = "You are a research assistant that answers multi-hop questions. Read the supporting passages carefully and articulate the final answer plus a short justification. Use the format:\nAnswer: ...\nSupport: ..."
+order = 0
+[[prompt_learning.initial_prompt.messages]]
+role = "user"
+pattern = "Question: {question}\n\nPassages:\n{context}\n\nProvide the final answer and cite the supporting facts."
+order = 1
+[prompt_learning.initial_prompt.wildcards]
+question = "REQUIRED"
+context = "REQUIRED"
+[prompt_learning.policy]
+model = "openai/gpt-oss-20b"
+provider = "groq"
+inference_url = "https://api.groq.com/openai/v1"
+temperature = 0.0
+max_completion_tokens = 512
+policy_name = "hotpotqa-mipro"
+[prompt_learning.mipro]
+env_name = "hotpotqa"
+num_iterations = 20
+num_evaluations_per_iteration = 8
+batch_size = 8
+max_concurrent = 16
+meta_model = "gpt-4.1-mini"
+meta_model_provider = "openai"
+meta_model_inference_url = "https://api.openai.com/v1"
+few_shot_score_threshold = 0.75
+test_pool = [20, 21, 22, 23, 24]
+bootstrap_train_seeds = [0, 1, 2, 3, 4]
+online_pool = [5, 6, 7, 8, 9]

examples/blog_posts/gepa/configs/hover_gepa_local.toml ADDED Viewed

@@ -0,0 +1,59 @@
+# GEPA Prompt Learning for HoVer
+# Local backend configuration targeting the HoVer task app.
+[prompt_learning]
+algorithm = "gepa"
+task_app_url = "http://127.0.0.1:8112"
+task_app_id = "hover"
+# Seeds for online evaluation during optimisation
+evaluation_seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+# Held-out seeds for final reporting
+test_pool = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[prompt_learning.initial_prompt]
+id = "hover_verification"
+name = "HoVer Claim Verification"
+[[prompt_learning.initial_prompt.messages]]
+role = "system"
+pattern = "You verify Wikipedia claims. For each example decide whether the claim is SUPPORTED, REFUTED, or INSUFFICIENT and cite the sentences that justify the label."
+order = 0
+[[prompt_learning.initial_prompt.messages]]
+role = "user"
+pattern = "Claim: {claim}\n\nEvidence:\n{evidence}\n\nRespond with the format:\nLabel: <SUPPORTED|REFUTED|INSUFFICIENT>\nRationale: <brief explanation citing evidence lines>."
+order = 1
+[prompt_learning.initial_prompt.wildcards]
+claim = "REQUIRED"
+evidence = "REQUIRED"
+[prompt_learning.policy]
+model = "openai/gpt-oss-20b"
+provider = "groq"
+inference_url = "https://api.groq.com/openai/v1"
+temperature = 0.0
+max_completion_tokens = 512
+policy_name = "hover-gepa"
+[prompt_learning.gepa]
+env_name = "hover"
+initial_population_size = 24
+num_generations = 15
+mutation_rate = 0.33
+crossover_rate = 0.55
+selection_pressure = 1.0
+minibatch_size = 8
+pareto_set_size = 24
+feedback_fraction = 0.5
+children_per_generation = 12
+patience_generations = 5
+rollout_budget = 540
+archive_size = 36
+pareto_eps = 1e-6
+max_concurrent_rollouts = 20
+mutation_llm_model = "openai/gpt-oss-20b"
+mutation_llm_provider = "groq"
+mutation_llm_inference_url = "https://api.groq.com/openai/v1"

examples/blog_posts/gepa/configs/hover_gepa_qwen.toml ADDED Viewed

@@ -0,0 +1,36 @@
+[prompt_learning]
+algorithm = "gepa"
+task_app_url = "https://synth-laboratories-dev--synth-banking77-web-web.modal.run" # TODO: replace with HotpotQA task app URL
+task_app_id = "hotpotqa"
+# Seeds
+evaluation_seeds = [0,1,2,3,4,5,6,7,8,9]
+# Held-out validation
+validation_seeds = [10,11,12,13,14,15,16,17,18,19]
+validation_pool = "validation"
+validation_top_k = 3
+# Train split configuration
+[prompt_learning.env_config]
+pool = "train"
+# Policy model (synth Qwen via backend inference proxy)
+[prompt_learning.policy]
+provider = "synth"
+model = "Qwen/Qwen3-8B"
+# inference_url will be mapped to backend /api/inference/v1 by the optimizer
+# GEPA parameters (tune as needed)
+[prompt_learning.gepa]
+env_name = "hover"
+initial_population_size = 24
+num_generations = 6
+children_per_generation = 12
+minibatch_size = 10
+pareto_set_size = 32
+rollout_budget = 600
+max_concurrent_rollouts = 16
+mutation_llm_model = "openai/gpt-oss-120b"
+mutation_llm_provider = "groq"
+proposer_type = "dspy"

examples/blog_posts/gepa/configs/hover_mipro_local.toml ADDED Viewed

@@ -0,0 +1,53 @@
+# MIPROv2 Prompt Learning for HoVer
+# Local backend configuration targeting the HoVer task app.
+[prompt_learning]
+algorithm = "mipro"
+task_app_url = "http://127.0.0.1:8112"
+task_app_id = "hover"
+# Seeds explored during optimisation
+evaluation_seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+# Held-out seeds used for the final sweep
+test_pool = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[prompt_learning.initial_prompt]
+id = "hover_verification"
+name = "HoVer Claim Verification"
+[[prompt_learning.initial_prompt.messages]]
+role = "system"
+pattern = "You are a fact-checking assistant. Review the evidence carefully and respond with SUPPORTED, REFUTED, or INSUFFICIENT along with a concise justification."
+order = 0
+[[prompt_learning.initial_prompt.messages]]
+role = "user"
+pattern = "Claim: {claim}\n\nEvidence:\n{evidence}\n\nRespond with the format:\nLabel: <SUPPORTED|REFUTED|INSUFFICIENT>\nRationale: <brief explanation>."
+order = 1
+[prompt_learning.initial_prompt.wildcards]
+claim = "REQUIRED"
+evidence = "REQUIRED"
+[prompt_learning.policy]
+model = "openai/gpt-oss-20b"
+provider = "groq"
+inference_url = "https://api.groq.com/openai/v1"
+temperature = 0.0
+max_completion_tokens = 512
+policy_name = "hover-mipro"
+[prompt_learning.mipro]
+env_name = "hover"
+num_iterations = 20
+num_evaluations_per_iteration = 6
+batch_size = 6
+max_concurrent = 16
+meta_model = "gpt-4.1-mini"
+meta_model_provider = "openai"
+meta_model_inference_url = "https://api.openai.com/v1"
+few_shot_score_threshold = 0.8
+test_pool = [20, 21, 22, 23, 24]
+bootstrap_train_seeds = [0, 1, 2, 3, 4]
+online_pool = [5, 6, 7, 8, 9]

examples/blog_posts/gepa/configs/ifbench_gepa_local.toml ADDED Viewed

@@ -0,0 +1,59 @@
+# GEPA Prompt Learning for IFBench
+# Local backend configuration targeting the IFBench task app.
+[prompt_learning]
+algorithm = "gepa"
+task_app_url = "http://127.0.0.1:8111"
+task_app_id = "ifbench"
+# Candidate evaluation seeds during optimisation
+evaluation_seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+# Held-out pool used for the final comparison sweep
+test_pool = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[prompt_learning.initial_prompt]
+id = "ifbench_following"
+name = "IFBench Instruction Following"
+[[prompt_learning.initial_prompt.messages]]
+role = "system"
+pattern = "You are an obedient assistant that must follow instructions exactly. Ensure that every requirement is satisfied, avoid unsolicited commentary, and be explicit when information is missing."
+order = 0
+[[prompt_learning.initial_prompt.messages]]
+role = "user"
+pattern = "Instruction: {instruction}\n\nInput: {input}\n\nProvide the response that best follows the instruction."
+order = 1
+[prompt_learning.initial_prompt.wildcards]
+instruction = "REQUIRED"
+input = "OPTIONAL"
+[prompt_learning.policy]
+model = "openai/gpt-oss-20b"
+provider = "groq"
+inference_url = "https://api.groq.com/openai/v1"
+temperature = 0.0
+max_completion_tokens = 512
+policy_name = "ifbench-gepa"
+[prompt_learning.gepa]
+env_name = "ifbench"
+initial_population_size = 24
+num_generations = 12
+mutation_rate = 0.3
+crossover_rate = 0.6
+selection_pressure = 1.0
+minibatch_size = 8
+pareto_set_size = 24
+feedback_fraction = 0.5
+children_per_generation = 12
+patience_generations = 4
+rollout_budget = 480
+archive_size = 32
+pareto_eps = 1e-6
+max_concurrent_rollouts = 20
+mutation_llm_model = "openai/gpt-oss-20b"
+mutation_llm_provider = "groq"
+mutation_llm_inference_url = "https://api.groq.com/openai/v1"

examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml ADDED Viewed

@@ -0,0 +1,36 @@
+[prompt_learning]
+algorithm = "gepa"
+task_app_url = "https://synth-laboratories-dev--synth-banking77-web-web.modal.run" # TODO: replace with HotpotQA task app URL
+task_app_id = "hotpotqa"
+# Seeds
+evaluation_seeds = [0,1,2,3,4,5,6,7,8,9]
+# Held-out validation
+validation_seeds = [10,11,12,13,14,15,16,17,18,19]
+validation_pool = "validation"
+validation_top_k = 3
+# Train split configuration
+[prompt_learning.env_config]
+pool = "train"
+# Policy model (synth Qwen via backend inference proxy)
+[prompt_learning.policy]
+provider = "synth"
+model = "Qwen/Qwen3-8B"
+# inference_url will be mapped to backend /api/inference/v1 by the optimizer
+# GEPA parameters (tune as needed)
+[prompt_learning.gepa]
+env_name = "ifbench"
+initial_population_size = 24
+num_generations = 6
+children_per_generation = 12
+minibatch_size = 10
+pareto_set_size = 32
+rollout_budget = 600
+max_concurrent_rollouts = 16
+mutation_llm_model = "openai/gpt-oss-120b"
+mutation_llm_provider = "groq"
+proposer_type = "dspy"

examples/blog_posts/gepa/configs/ifbench_mipro_local.toml ADDED Viewed

@@ -0,0 +1,53 @@
+# MIPROv2 Prompt Learning for IFBench
+# Local backend configuration targeting the IFBench task app.
+[prompt_learning]
+algorithm = "mipro"
+task_app_url = "http://127.0.0.1:8111"
+task_app_id = "ifbench"
+# Seeds evaluated during optimisation
+evaluation_seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+# Held-out seeds for the final comparison
+test_pool = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[prompt_learning.initial_prompt]
+id = "ifbench_following"
+name = "IFBench Instruction Following"
+[[prompt_learning.initial_prompt.messages]]
+role = "system"
+pattern = "You are an obedient assistant that follows instructions exactly. Ensure that every constraint is satisfied and mention explicitly if something cannot be completed."
+order = 0
+[[prompt_learning.initial_prompt.messages]]
+role = "user"
+pattern = "Instruction: {instruction}\n\nInput: {input}\n\nReturn the response that best follows the instruction."
+order = 1
+[prompt_learning.initial_prompt.wildcards]
+instruction = "REQUIRED"
+input = "OPTIONAL"
+[prompt_learning.policy]
+model = "openai/gpt-oss-20b"
+provider = "groq"
+inference_url = "https://api.groq.com/openai/v1"
+temperature = 0.0
+max_completion_tokens = 512
+policy_name = "ifbench-mipro"
+[prompt_learning.mipro]
+env_name = "ifbench"
+num_iterations = 16
+num_evaluations_per_iteration = 6
+batch_size = 6
+max_concurrent = 16
+meta_model = "gpt-4.1-mini"
+meta_model_provider = "openai"
+meta_model_inference_url = "https://api.openai.com/v1"
+few_shot_score_threshold = 0.8
+test_pool = [20, 21, 22, 23, 24]
+bootstrap_train_seeds = [0, 1, 2, 3, 4]
+online_pool = [5, 6, 7, 8, 9]

examples/blog_posts/gepa/configs/pupa_gepa_local.toml ADDED Viewed

@@ -0,0 +1,60 @@
+# GEPA Prompt Learning for PUPA
+# Local backend configuration targeting the PUPA privacy-aware delegation task app.
+[prompt_learning]
+algorithm = "gepa"
+task_app_url = "http://127.0.0.1:8113"
+task_app_id = "pupa"
+# Seeds explored during optimisation
+evaluation_seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+# Held-out seeds for the final evaluation
+test_pool = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[prompt_learning.initial_prompt]
+id = "pupa_privacy"
+name = "PUPA Privacy-Constrained Delegation"
+[[prompt_learning.initial_prompt.messages]]
+role = "system"
+pattern = "You are an assistant that must complete the task while honouring every privacy rule. Never reveal disallowed fields, always justify decisions, and explicitly state when data cannot be shared."
+order = 0
+[[prompt_learning.initial_prompt.messages]]
+role = "user"
+pattern = "Task: {task}\n\nPrivacy Policy: {policy}\n\nRecords:\n{records}\n\nProduce the delegated plan or response while respecting the privacy policy."
+order = 1
+[prompt_learning.initial_prompt.wildcards]
+task = "REQUIRED"
+policy = "REQUIRED"
+records = "REQUIRED"
+[prompt_learning.policy]
+model = "openai/gpt-oss-20b"
+provider = "groq"
+inference_url = "https://api.groq.com/openai/v1"
+temperature = 0.0
+max_completion_tokens = 512
+policy_name = "pupa-gepa"
+[prompt_learning.gepa]
+env_name = "pupa"
+initial_population_size = 24
+num_generations = 15
+mutation_rate = 0.3
+crossover_rate = 0.6
+selection_pressure = 1.0
+minibatch_size = 8
+pareto_set_size = 24
+feedback_fraction = 0.6
+children_per_generation = 12
+patience_generations = 5
+rollout_budget = 540
+archive_size = 36
+pareto_eps = 1e-6
+max_concurrent_rollouts = 20
+mutation_llm_model = "openai/gpt-oss-20b"
+mutation_llm_provider = "groq"
+mutation_llm_inference_url = "https://api.groq.com/openai/v1"

examples/blog_posts/gepa/configs/pupa_mipro_local.toml ADDED Viewed

@@ -0,0 +1,54 @@
+# MIPROv2 Prompt Learning for PUPA
+# Local backend configuration targeting the PUPA privacy-aware delegation task app.
+[prompt_learning]
+algorithm = "mipro"
+task_app_url = "http://127.0.0.1:8113"
+task_app_id = "pupa"
+# Seeds evaluated during optimisation
+evaluation_seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
+# Held-out seeds for the final sweep
+test_pool = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
+[prompt_learning.initial_prompt]
+id = "pupa_privacy"
+name = "PUPA Privacy-Constrained Delegation"
+[[prompt_learning.initial_prompt.messages]]
+role = "system"
+pattern = "You are an assistant that must complete tasks without violating the privacy policy. Redact any forbidden attributes and justify refusals."
+order = 0
+[[prompt_learning.initial_prompt.messages]]
+role = "user"
+pattern = "Task: {task}\n\nPrivacy Policy: {policy}\n\nRecords:\n{records}\n\nProvide the delegated plan or answer, ensuring compliance with the privacy policy."
+order = 1
+[prompt_learning.initial_prompt.wildcards]
+task = "REQUIRED"
+policy = "REQUIRED"
+records = "REQUIRED"
+[prompt_learning.policy]
+model = "openai/gpt-oss-20b"
+provider = "groq"
+inference_url = "https://api.groq.com/openai/v1"
+temperature = 0.0
+max_completion_tokens = 512
+policy_name = "pupa-mipro"
+[prompt_learning.mipro]
+env_name = "pupa"
+num_iterations = 20
+num_evaluations_per_iteration = 6
+batch_size = 6
+max_concurrent = 16
+meta_model = "gpt-4.1-mini"
+meta_model_provider = "openai"
+meta_model_inference_url = "https://api.openai.com/v1"
+few_shot_score_threshold = 0.85
+test_pool = [20, 21, 22, 23, 24]
+bootstrap_train_seeds = [0, 1, 2, 3, 4]
+online_pool = [5, 6, 7, 8, 9]

examples/blog_posts/gepa/deploy_banking77_task_app.sh ADDED Viewed

@@ -0,0 +1,41 @@
+#!/bin/bash
+# Deploy Banking77 task app locally for GEPA optimization
+set -e
+echo "🚀 Deploying Banking77 Task App..."
+echo "=================================="
+# Set up environment variables
+export ENVIRONMENT_API_KEY="${ENVIRONMENT_API_KEY:-$(python -c 'import secrets; print(secrets.token_urlsafe(32))')}"
+export GROQ_API_KEY="${GROQ_API_KEY}"
+# Check for required env vars
+if [ -z "$GROQ_API_KEY" ]; then
+    echo "❌ Error: GROQ_API_KEY not set"
+    echo "Please set it: export GROQ_API_KEY=your_key"
+    exit 1
+fi
+echo "✅ ENVIRONMENT_API_KEY: ${ENVIRONMENT_API_KEY:0:20}..."
+echo "✅ GROQ_API_KEY: ${GROQ_API_KEY:0:20}..."
+# Navigate to repo root
+cd "$(dirname "$0")/../../.."
+echo ""
+echo "📦 Installing dependencies..."
+uv pip install -e . --quiet || true
+echo ""
+echo "🏃 Starting Banking77 task app on http://127.0.0.1:8102"
+echo "Press Ctrl+C to stop"
+echo ""
+# Run the task app
+python -m examples.task_apps.banking77.banking77_task_app \
+    --host 0.0.0.0 \
+    --port 8102 \
+    --reload

synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.19py3-none-any.whl