PyPI - synth-ai - Versions diffs - 0.2.9.dev11__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

synth-ai 0.2.9.dev11py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (909) hide show

synth_ai/__init__.py +44 -45
synth_ai/__main__.py +30 -3
synth_ai/cli/__init__.py +104 -78
synth_ai/cli/__main__.py +42 -0
synth_ai/cli/_internal/__init__.py +5 -0
synth_ai/cli/_internal/modal_wrapper.py +31 -0
synth_ai/cli/_internal/storage.py +20 -0
synth_ai/cli/_internal/typer_patch.py +47 -0
synth_ai/cli/_internal/validate_task_app.py +29 -0
synth_ai/cli/agents/__init__.py +17 -0
synth_ai/cli/agents/claude.py +77 -0
synth_ai/cli/agents/codex.py +265 -0
synth_ai/cli/agents/opencode.py +253 -0
synth_ai/cli/commands/__init__.py +18 -0
synth_ai/cli/commands/artifacts/__init__.py +13 -0
synth_ai/cli/commands/artifacts/client.py +119 -0
synth_ai/cli/commands/artifacts/config.py +57 -0
synth_ai/cli/commands/artifacts/core.py +24 -0
synth_ai/cli/commands/artifacts/download.py +188 -0
synth_ai/cli/commands/artifacts/export.py +186 -0
synth_ai/cli/commands/artifacts/list.py +156 -0
synth_ai/cli/commands/artifacts/parsing.py +250 -0
synth_ai/cli/commands/artifacts/show.py +336 -0
synth_ai/cli/commands/baseline/__init__.py +12 -0
synth_ai/cli/commands/baseline/core.py +636 -0
synth_ai/cli/commands/baseline/list.py +94 -0
synth_ai/cli/commands/demo/__init__.py +3 -0
synth_ai/cli/commands/demo/core.py +153 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1113 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +424 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +185 -0
synth_ai/cli/commands/help/core.py +72 -0
synth_ai/cli/commands/scan/__init__.py +19 -0
synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
synth_ai/cli/commands/scan/core.py +344 -0
synth_ai/cli/commands/scan/health_checker.py +242 -0
synth_ai/cli/commands/scan/local_scanner.py +278 -0
synth_ai/cli/commands/scan/models.py +83 -0
synth_ai/cli/commands/smoke/__init__.py +7 -0
synth_ai/cli/commands/smoke/core.py +1438 -0
synth_ai/cli/commands/status/__init__.py +66 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/pricing.py +23 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/session.py +182 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/subcommands/usage.py +203 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +22 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +201 -0
synth_ai/cli/commands/train/judge_validation.py +305 -0
synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
synth_ai/cli/commands/train/validation.py +392 -0
synth_ai/cli/demo_apps/__init__.py +10 -0
synth_ai/cli/demo_apps/core/__init__.py +28 -0
synth_ai/cli/demo_apps/core/cli.py +1735 -0
synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
synth_ai/cli/demo_apps/demo_registry.py +176 -0
synth_ai/cli/demo_apps/demo_task_apps/core.py +440 -0
synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +742 -0
synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
synth_ai/cli/demo_apps/math/__init__.py +1 -0
synth_ai/cli/demo_apps/math/_common.py +16 -0
synth_ai/cli/demo_apps/math/app.py +38 -0
synth_ai/cli/demo_apps/math/config.toml +76 -0
synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
synth_ai/cli/demo_apps/math/modal_task_app.py +702 -0
synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
synth_ai/cli/demo_apps/mipro/main.py +271 -0
synth_ai/cli/demo_apps/mipro/task_app.py +933 -0
synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
synth_ai/cli/demos/__init__.py +12 -0
synth_ai/cli/demos/demo.py +32 -0
synth_ai/cli/demos/rl_demo.py +254 -0
synth_ai/cli/deploy.py +216 -0
synth_ai/cli/infra/__init__.py +14 -0
synth_ai/cli/infra/balance.py +216 -0
synth_ai/cli/infra/mcp.py +35 -0
synth_ai/cli/infra/modal_app.py +36 -0
synth_ai/cli/infra/setup.py +69 -0
synth_ai/cli/infra/status.py +16 -0
synth_ai/cli/infra/turso.py +77 -0
synth_ai/cli/lib/__init__.py +10 -0
synth_ai/cli/lib/agents.py +76 -0
synth_ai/cli/lib/apps/modal_app.py +101 -0
synth_ai/cli/lib/apps/task_app.py +643 -0
synth_ai/cli/lib/bin.py +39 -0
synth_ai/cli/lib/env.py +375 -0
synth_ai/cli/lib/errors.py +85 -0
synth_ai/cli/lib/modal.py +315 -0
synth_ai/cli/lib/plotting.py +126 -0
synth_ai/cli/lib/prompt_args.py +39 -0
synth_ai/cli/lib/prompts.py +284 -0
synth_ai/cli/lib/sqld.py +122 -0
synth_ai/cli/lib/task_app_discovery.py +884 -0
synth_ai/cli/lib/task_app_env.py +295 -0
synth_ai/cli/lib/train_cfgs.py +300 -0
synth_ai/cli/lib/tunnel_records.py +207 -0
synth_ai/cli/local/__init__.py +14 -0
synth_ai/cli/local/experiment_queue/__init__.py +72 -0
synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
synth_ai/cli/local/experiment_queue/config.py +128 -0
synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
synth_ai/cli/local/experiment_queue/database.py +175 -0
synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
synth_ai/cli/local/experiment_queue/models.py +231 -0
synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
synth_ai/cli/local/experiment_queue/results.py +373 -0
synth_ai/cli/local/experiment_queue/schemas.py +131 -0
synth_ai/cli/local/experiment_queue/service.py +344 -0
synth_ai/cli/local/experiment_queue/status.py +372 -0
synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
synth_ai/cli/local/experiment_queue/validation.py +157 -0
synth_ai/cli/local/session/__init__.py +92 -0
synth_ai/cli/local/session/client.py +383 -0
synth_ai/cli/local/session/constants.py +63 -0
synth_ai/cli/local/session/exceptions.py +105 -0
synth_ai/cli/local/session/manager.py +139 -0
synth_ai/cli/local/session/models.py +89 -0
synth_ai/cli/local/session/query.py +110 -0
synth_ai/cli/root.py +30 -103
synth_ai/cli/task_apps/__init__.py +26 -0
synth_ai/cli/task_apps/commands.py +3153 -0
synth_ai/cli/task_apps/deploy.py +7 -0
synth_ai/cli/task_apps/list.py +26 -0
synth_ai/cli/task_apps/main.py +36 -0
synth_ai/cli/task_apps/modal_serve.py +11 -0
synth_ai/cli/task_apps/serve.py +11 -0
synth_ai/cli/training/__init__.py +8 -0
synth_ai/cli/training/train.py +5 -0
synth_ai/cli/training/train_cfg.py +34 -0
synth_ai/cli/training/watch.py +506 -0
synth_ai/cli/turso.py +34 -55
synth_ai/cli/usage.py +159 -0
synth_ai/cli/utils/__init__.py +8 -0
synth_ai/cli/utils/experiments.py +235 -0
synth_ai/cli/utils/queue.py +504 -0
synth_ai/cli/utils/recent.py +133 -0
synth_ai/cli/utils/traces.py +164 -0
synth_ai/contracts/__init__.py +67 -0
synth_ai/core/__init__.py +100 -0
synth_ai/core/_utils/__init__.py +54 -0
synth_ai/core/_utils/base_url.py +10 -0
synth_ai/core/_utils/http.py +10 -0
synth_ai/core/_utils/prompts.py +14 -0
synth_ai/core/_utils/task_app_state.py +12 -0
synth_ai/core/_utils/user_config.py +10 -0
synth_ai/core/apps/common.py +116 -0
synth_ai/core/auth.py +95 -0
synth_ai/core/cfgs.py +240 -0
synth_ai/core/config/__init__.py +16 -0
synth_ai/core/config/base.py +168 -0
synth_ai/core/config/resolver.py +89 -0
synth_ai/core/env.py +220 -0
synth_ai/core/errors.py +126 -0
synth_ai/core/http.py +230 -0
synth_ai/core/integrations/__init__.py +11 -0
synth_ai/core/integrations/cloudflare.py +1710 -0
synth_ai/core/integrations/mcp/__init__.py +6 -0
synth_ai/core/integrations/mcp/__main__.py +8 -0
synth_ai/core/integrations/mcp/claude.py +36 -0
synth_ai/core/integrations/mcp/main.py +254 -0
synth_ai/core/integrations/mcp/setup.py +100 -0
synth_ai/core/integrations/modal.py +277 -0
synth_ai/core/json.py +72 -0
synth_ai/core/log_filter.py +99 -0
synth_ai/core/logging.py +82 -0
synth_ai/core/paths.py +107 -0
synth_ai/core/pricing.py +109 -0
synth_ai/core/process.py +233 -0
synth_ai/core/ssl.py +25 -0
synth_ai/core/storage/__init__.py +71 -0
synth_ai/core/task_app_state.py +318 -0
synth_ai/core/telemetry.py +282 -0
synth_ai/core/tracing_v3/__init__.py +99 -0
synth_ai/core/tracing_v3/config.py +229 -0
synth_ai/core/tracing_v3/constants.py +21 -0
synth_ai/core/tracing_v3/db_config.py +182 -0
synth_ai/core/tracing_v3/decorators.py +401 -0
synth_ai/core/tracing_v3/examples/basic_usage.py +194 -0
synth_ai/core/tracing_v3/llm_call_record_helpers.py +437 -0
synth_ai/core/tracing_v3/migration_helper.py +119 -0
synth_ai/core/tracing_v3/replica_sync.py +262 -0
synth_ai/core/tracing_v3/serialization.py +130 -0
synth_ai/core/tracing_v3/session_tracer.py +542 -0
synth_ai/core/tracing_v3/storage/base.py +211 -0
synth_ai/core/tracing_v3/storage/config.py +109 -0
synth_ai/core/tracing_v3/storage/factory.py +39 -0
synth_ai/core/tracing_v3/storage/utils.py +206 -0
synth_ai/core/tracing_v3/trace_utils.py +326 -0
synth_ai/core/tracing_v3/turso/__init__.py +12 -0
synth_ai/core/tracing_v3/turso/daemon.py +278 -0
synth_ai/core/tracing_v3/turso/models.py +470 -0
synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
synth_ai/core/tracing_v3/utils.py +108 -0
synth_ai/core/urls.py +18 -0
synth_ai/core/user_config.py +137 -0
synth_ai/core/uvicorn.py +222 -0
synth_ai/data/__init__.py +110 -0
synth_ai/data/enums.py +141 -0
synth_ai/data/rewards.py +152 -0
synth_ai/data/specs.py +36 -0
synth_ai/data/traces.py +35 -0
synth_ai/products/__init__.py +6 -0
synth_ai/products/graph_evolve/__init__.py +46 -0
synth_ai/products/graph_evolve/client.py +226 -0
synth_ai/products/graph_evolve/config.py +591 -0
synth_ai/products/graph_evolve/converters/__init__.py +42 -0
synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
synth_ai/products/graph_evolve/run.py +222 -0
synth_ai/sdk/__init__.py +119 -0
synth_ai/sdk/api/__init__.py +1 -0
synth_ai/sdk/api/models/supported.py +514 -0
synth_ai/sdk/api/research_agent/__init__.py +86 -0
synth_ai/sdk/api/research_agent/cli.py +428 -0
synth_ai/sdk/api/research_agent/config.py +357 -0
synth_ai/sdk/api/research_agent/job.py +717 -0
synth_ai/sdk/api/train/__init__.py +85 -0
synth_ai/sdk/api/train/builders.py +895 -0
synth_ai/sdk/api/train/cli.py +2188 -0
synth_ai/sdk/api/train/config_finder.py +267 -0
synth_ai/sdk/api/train/configs/__init__.py +65 -0
synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
synth_ai/sdk/api/train/configs/rl.py +188 -0
synth_ai/sdk/api/train/configs/sft.py +99 -0
synth_ai/sdk/api/train/configs/shared.py +81 -0
synth_ai/sdk/api/train/context_learning.py +312 -0
synth_ai/sdk/api/train/env_resolver.py +418 -0
synth_ai/sdk/api/train/graph_validators.py +216 -0
synth_ai/sdk/api/train/graphgen.py +984 -0
synth_ai/sdk/api/train/graphgen_models.py +823 -0
synth_ai/sdk/api/train/graphgen_validators.py +109 -0
synth_ai/sdk/api/train/pollers.py +124 -0
synth_ai/sdk/api/train/progress/__init__.py +97 -0
synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
synth_ai/sdk/api/train/progress/events.py +326 -0
synth_ai/sdk/api/train/progress/results.py +428 -0
synth_ai/sdk/api/train/progress/tracker.py +641 -0
synth_ai/sdk/api/train/prompt_learning.py +470 -0
synth_ai/sdk/api/train/rl.py +442 -0
synth_ai/sdk/api/train/sft.py +396 -0
synth_ai/sdk/api/train/summary.py +522 -0
synth_ai/sdk/api/train/supported_algos.py +147 -0
synth_ai/sdk/api/train/task_app.py +331 -0
synth_ai/sdk/api/train/utils.py +279 -0
synth_ai/sdk/api/train/validators.py +2424 -0
synth_ai/sdk/baseline/__init__.py +25 -0
synth_ai/sdk/baseline/config.py +209 -0
synth_ai/sdk/baseline/discovery.py +216 -0
synth_ai/sdk/baseline/execution.py +154 -0
synth_ai/sdk/graphs/__init__.py +15 -0
synth_ai/sdk/graphs/completions.py +570 -0
synth_ai/sdk/inference/__init__.py +6 -0
synth_ai/sdk/inference/client.py +128 -0
synth_ai/sdk/jobs/__init__.py +16 -0
synth_ai/sdk/jobs/client.py +371 -0
synth_ai/sdk/judging/__init__.py +15 -0
synth_ai/sdk/judging/base.py +24 -0
synth_ai/sdk/judging/client.py +191 -0
synth_ai/sdk/judging/schemas.py +222 -0
synth_ai/sdk/judging/types.py +42 -0
synth_ai/sdk/learning/__init__.py +69 -0
synth_ai/sdk/learning/client.py +240 -0
synth_ai/sdk/learning/ft_client.py +7 -0
synth_ai/sdk/learning/health.py +49 -0
synth_ai/sdk/learning/jobs.py +202 -0
synth_ai/sdk/learning/prompt_extraction.py +334 -0
synth_ai/sdk/learning/prompt_learning_client.py +455 -0
synth_ai/sdk/learning/prompt_learning_types.py +185 -0
synth_ai/sdk/learning/rl/client.py +268 -0
synth_ai/sdk/learning/rl/contracts.py +27 -0
synth_ai/sdk/learning/rl/env_keys.py +166 -0
synth_ai/sdk/learning/rl/secrets.py +13 -0
synth_ai/sdk/learning/sft/client.py +95 -0
synth_ai/sdk/learning/sft/config.py +270 -0
synth_ai/sdk/learning/sft/data.py +698 -0
synth_ai/sdk/learning/validators.py +52 -0
synth_ai/sdk/research_agent/__init__.py +34 -0
synth_ai/sdk/research_agent/container_builder.py +328 -0
synth_ai/sdk/research_agent/container_spec.py +198 -0
synth_ai/sdk/research_agent/defaults.py +34 -0
synth_ai/sdk/research_agent/results_collector.py +69 -0
synth_ai/sdk/specs/__init__.py +46 -0
synth_ai/sdk/specs/dataclasses.py +149 -0
synth_ai/sdk/specs/loader.py +144 -0
synth_ai/sdk/specs/serializer.py +199 -0
synth_ai/sdk/specs/validation.py +250 -0
synth_ai/sdk/streaming/__init__.py +35 -0
synth_ai/sdk/streaming/config.py +94 -0
synth_ai/sdk/streaming/handlers.py +1997 -0
synth_ai/sdk/streaming/streamer.py +704 -0
synth_ai/sdk/streaming/types.py +112 -0
synth_ai/sdk/task/__init__.py +151 -0
synth_ai/sdk/task/apps/__init__.py +133 -0
synth_ai/sdk/task/config.py +261 -0
synth_ai/sdk/task/contracts.py +298 -0
synth_ai/sdk/task/datasets.py +108 -0
synth_ai/sdk/task/in_process.py +1190 -0
synth_ai/sdk/task/in_process_runner.py +309 -0
synth_ai/sdk/task/inference_api.py +299 -0
synth_ai/sdk/task/proxy.py +287 -0
synth_ai/sdk/task/rubrics/__init__.py +55 -0
synth_ai/sdk/task/rubrics/loaders.py +156 -0
synth_ai/sdk/task/rubrics/models.py +57 -0
synth_ai/sdk/task/rubrics/scoring.py +116 -0
synth_ai/sdk/task/rubrics/strict.py +149 -0
synth_ai/sdk/task/server.py +580 -0
synth_ai/sdk/task/trace_correlation_helpers.py +506 -0
synth_ai/sdk/task/tracing_utils.py +95 -0
synth_ai/sdk/task/validators.py +456 -0
synth_ai/sdk/tracing/__init__.py +39 -0
synth_ai/sdk/training/__init__.py +102 -0
synth_ai/sdk/usage/__init__.py +37 -0
synth_ai/sdk/usage/client.py +171 -0
synth_ai/sdk/usage/models.py +261 -0
synth_ai/utils/__init__.py +213 -0
synth_ai-0.4.1.dist-info/METADATA +195 -0
synth_ai-0.4.1.dist-info/RECORD +379 -0
synth_ai-0.4.1.dist-info/entry_points.txt +2 -0
synth_ai-0.4.1.dist-info/top_level.txt +1 -0
examples/__init__.py +0 -16
examples/analyze_semantic_words.sh +0 -17
examples/crafter_debug_render.py +0 -186
examples/qwen_coder/README.md +0 -102
examples/qwen_coder/_shared.py +0 -113
examples/qwen_coder/configs/coder_lora_30b.toml +0 -61
examples/qwen_coder/configs/coder_lora_4b.toml +0 -57
examples/qwen_coder/configs/coder_lora_small.toml +0 -58
examples/qwen_coder/generate_dataset.py +0 -98
examples/qwen_coder/infer_ft_smoke.py +0 -64
examples/qwen_coder/infer_prod_proxy.py +0 -73
examples/qwen_coder/infer_via_synth.py +0 -87
examples/qwen_coder/scripts/infer_coder.sh +0 -18
examples/qwen_coder/scripts/train_coder_30b.sh +0 -21
examples/qwen_coder/sft_full_17b.py +0 -103
examples/qwen_coder/sft_lora_30b.py +0 -110
examples/qwen_coder/subset_jsonl.py +0 -38
examples/qwen_coder/validate_jsonl.py +0 -59
examples/rl/README.md +0 -169
examples/rl/configs/eval_base_qwen.toml +0 -15
examples/rl/configs/eval_rl_qwen.toml +0 -11
examples/rl/configs/rl_from_base_qwen.toml +0 -35
examples/rl/configs/rl_from_base_qwen17.toml +0 -74
examples/rl/configs/rl_from_ft_qwen.toml +0 -35
examples/rl/download_dataset.py +0 -80
examples/rl/run_eval.py +0 -436
examples/rl/run_rl_and_save.py +0 -111
examples/rl/task_app/README.md +0 -22
examples/rl/task_app/math_single_step.py +0 -991
examples/rl/task_app/math_task_app.py +0 -115
examples/run_crafter_demo.sh +0 -10
examples/sft/README.md +0 -139
examples/sft/configs/crafter_fft_qwen0p6b.toml +0 -44
examples/sft/configs/crafter_lora_qwen0p6b.toml +0 -45
examples/sft/evaluate.py +0 -117
examples/sft/export_dataset.py +0 -117
examples/sft/generate_traces.py +0 -162
examples/swe/__init__.py +0 -12
examples/swe/task_app/README.md +0 -105
examples/swe/task_app/__init__.py +0 -2
examples/swe/task_app/grpo_swe_mini.py +0 -571
examples/swe/task_app/grpo_swe_mini_task_app.py +0 -136
examples/swe/task_app/hosted/README.md +0 -173
examples/swe/task_app/hosted/__init__.py +0 -5
examples/swe/task_app/hosted/branching.py +0 -143
examples/swe/task_app/hosted/environment_routes.py +0 -1289
examples/swe/task_app/hosted/envs/__init__.py +0 -1
examples/swe/task_app/hosted/envs/crafter/__init__.py +0 -6
examples/swe/task_app/hosted/envs/crafter/app.py +0 -1
examples/swe/task_app/hosted/envs/crafter/environment.py +0 -522
examples/swe/task_app/hosted/envs/crafter/policy.py +0 -478
examples/swe/task_app/hosted/envs/crafter/react_agent.py +0 -108
examples/swe/task_app/hosted/envs/crafter/shared.py +0 -305
examples/swe/task_app/hosted/envs/crafter/tools.py +0 -47
examples/swe/task_app/hosted/envs/mini_swe/__init__.py +0 -8
examples/swe/task_app/hosted/envs/mini_swe/environment.py +0 -1164
examples/swe/task_app/hosted/envs/mini_swe/policy.py +0 -355
examples/swe/task_app/hosted/envs/mini_swe/shared.py +0 -83
examples/swe/task_app/hosted/envs/mini_swe/tools.py +0 -96
examples/swe/task_app/hosted/hosted_app.py +0 -204
examples/swe/task_app/hosted/inference/__init__.py +0 -5
examples/swe/task_app/hosted/inference/openai_client.py +0 -618
examples/swe/task_app/hosted/main.py +0 -100
examples/swe/task_app/hosted/policy_routes.py +0 -1079
examples/swe/task_app/hosted/registry.py +0 -195
examples/swe/task_app/hosted/rollout.py +0 -1869
examples/swe/task_app/hosted/storage/__init__.py +0 -5
examples/swe/task_app/hosted/storage/volume.py +0 -211
examples/swe/task_app/hosted/test_agents.py +0 -161
examples/swe/task_app/hosted/test_service.py +0 -137
examples/swe/task_app/hosted/utils.py +0 -62
examples/vlm/README.md +0 -68
examples/vlm/configs/crafter_vlm_gpt4o.toml +0 -44
examples/vlm/crafter_image_only_agent.py +0 -207
examples/vlm/crafter_openai_vlm_agent.py +0 -277
examples/vlm/filter_image_rows.py +0 -63
examples/vlm/run_crafter_vlm_benchmark.py +0 -316
examples/warming_up_to_rl/analyze_trace_db.py +0 -422
examples/warming_up_to_rl/configs/crafter_fft.toml +0 -48
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -54
examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +0 -20
examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +0 -13
examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +0 -23
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +0 -83
examples/warming_up_to_rl/configs/rl_from_ft.toml +0 -56
examples/warming_up_to_rl/export_trace_sft.py +0 -723
examples/warming_up_to_rl/groq_test.py +0 -95
examples/warming_up_to_rl/manage_secrets.py +0 -131
examples/warming_up_to_rl/readme.md +0 -179
examples/warming_up_to_rl/run_eval.py +0 -510
examples/warming_up_to_rl/run_fft_and_save.py +0 -380
examples/warming_up_to_rl/run_local_rollout.py +0 -237
examples/warming_up_to_rl/run_local_rollout_modal.py +0 -246
examples/warming_up_to_rl/run_local_rollout_parallel.py +0 -403
examples/warming_up_to_rl/run_local_rollout_traced.py +0 -475
examples/warming_up_to_rl/run_rl_and_save.py +0 -124
examples/warming_up_to_rl/run_rollout_remote.py +0 -154
examples/warming_up_to_rl/task_app/README.md +0 -42
examples/warming_up_to_rl/task_app/grpo_crafter.py +0 -700
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +0 -146
examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +0 -173
examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +0 -5
examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +0 -143
examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +0 -1226
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +0 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -6
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +0 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +0 -522
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +0 -478
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +0 -108
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -305
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -47
examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +0 -204
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +0 -5
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +0 -618
examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +0 -100
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +0 -1083
examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +0 -195
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +0 -1869
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +0 -5
examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +0 -211
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +0 -161
examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +0 -137
examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
synth/__init__.py +0 -14
synth_ai/api/models/supported.py +0 -376
synth_ai/api/train/__init__.py +0 -5
synth_ai/api/train/builders.py +0 -296
synth_ai/api/train/cli.py +0 -606
synth_ai/api/train/config_finder.py +0 -228
synth_ai/api/train/env_resolver.py +0 -347
synth_ai/api/train/pollers.py +0 -75
synth_ai/api/train/supported_algos.py +0 -139
synth_ai/api/train/task_app.py +0 -195
synth_ai/api/train/utils.py +0 -217
synth_ai/cli/_modal_wrapper.py +0 -28
synth_ai/cli/_typer_patch.py +0 -49
synth_ai/cli/balance.py +0 -203
synth_ai/cli/calc.py +0 -69
synth_ai/cli/demo.py +0 -159
synth_ai/cli/legacy_root_backup.py +0 -470
synth_ai/cli/man.py +0 -106
synth_ai/cli/recent.py +0 -127
synth_ai/cli/rl_demo.py +0 -274
synth_ai/cli/status.py +0 -133
synth_ai/cli/task_apps.py +0 -2782
synth_ai/cli/traces.py +0 -163
synth_ai/cli/watch.py +0 -505
synth_ai/config/base_url.py +0 -107
synth_ai/core/experiment.py +0 -13
synth_ai/core/system.py +0 -15
synth_ai/demo_registry.py +0 -295
synth_ai/demos/core/__init__.py +0 -1
synth_ai/demos/core/cli.py +0 -1756
synth_ai/demos/demo_task_apps/core.py +0 -440
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +0 -172
synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -739
synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -37
synth_ai/environments/__init__.py +0 -31
synth_ai/environments/environment/__init__.py +0 -1
synth_ai/environments/environment/artifacts/__init__.py +0 -1
synth_ai/environments/environment/artifacts/base.py +0 -52
synth_ai/environments/environment/core.py +0 -67
synth_ai/environments/environment/db/__init__.py +0 -1
synth_ai/environments/environment/db/sqlite.py +0 -45
synth_ai/environments/environment/registry.py +0 -233
synth_ai/environments/environment/resources/sqlite.py +0 -45
synth_ai/environments/environment/results.py +0 -1
synth_ai/environments/environment/rewards/__init__.py +0 -1
synth_ai/environments/environment/rewards/core.py +0 -29
synth_ai/environments/environment/shared_engine.py +0 -26
synth_ai/environments/environment/tools/__init__.py +0 -200
synth_ai/environments/examples/__init__.py +0 -1
synth_ai/environments/examples/bandit/__init__.py +0 -33
synth_ai/environments/examples/bandit/engine.py +0 -302
synth_ai/environments/examples/bandit/environment.py +0 -194
synth_ai/environments/examples/bandit/taskset.py +0 -200
synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
synth_ai/environments/examples/crafter_classic/engine.py +0 -579
synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
synth_ai/environments/examples/crafter_classic/environment.py +0 -479
synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
synth_ai/environments/examples/crafter_custom/environment.py +0 -312
synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
synth_ai/environments/examples/enron/engine.py +0 -295
synth_ai/environments/examples/enron/environment.py +0 -166
synth_ai/environments/examples/enron/taskset.py +0 -112
synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
synth_ai/environments/examples/minigrid/__init__.py +0 -48
synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
synth_ai/environments/examples/minigrid/engine.py +0 -589
synth_ai/environments/examples/minigrid/environment.py +0 -274
synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
synth_ai/environments/examples/minigrid/taskset.py +0 -583
synth_ai/environments/examples/nethack/__init__.py +0 -7
synth_ai/environments/examples/nethack/achievements.py +0 -337
synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
synth_ai/environments/examples/nethack/engine.py +0 -739
synth_ai/environments/examples/nethack/environment.py +0 -256
synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
synth_ai/environments/examples/nethack/taskset.py +0 -323
synth_ai/environments/examples/red/__init__.py +0 -7
synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
synth_ai/environments/examples/red/config_logging.py +0 -110
synth_ai/environments/examples/red/engine.py +0 -694
synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
synth_ai/environments/examples/red/environment.py +0 -238
synth_ai/environments/examples/red/taskset.py +0 -79
synth_ai/environments/examples/red/units/__init__.py +0 -1
synth_ai/environments/examples/sokoban/__init__.py +0 -1
synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
synth_ai/environments/examples/sokoban/engine.py +0 -678
synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
synth_ai/environments/examples/sokoban/environment.py +0 -229
synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
synth_ai/environments/examples/sokoban/taskset.py +0 -428
synth_ai/environments/examples/tictactoe/__init__.py +0 -1
synth_ai/environments/examples/tictactoe/engine.py +0 -368
synth_ai/environments/examples/tictactoe/environment.py +0 -240
synth_ai/environments/examples/tictactoe/taskset.py +0 -215
synth_ai/environments/examples/verilog/__init__.py +0 -10
synth_ai/environments/examples/verilog/engine.py +0 -329
synth_ai/environments/examples/verilog/environment.py +0 -350
synth_ai/environments/examples/verilog/taskset.py +0 -420
synth_ai/environments/examples/wordle/__init__.py +0 -29
synth_ai/environments/examples/wordle/engine.py +0 -398
synth_ai/environments/examples/wordle/environment.py +0 -159
synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
synth_ai/environments/examples/wordle/taskset.py +0 -230
synth_ai/environments/reproducibility/core.py +0 -42
synth_ai/environments/reproducibility/helpers.py +0 -0
synth_ai/environments/reproducibility/tree.py +0 -363
synth_ai/environments/service/app.py +0 -97
synth_ai/environments/service/core_routes.py +0 -1021
synth_ai/environments/service/external_registry.py +0 -56
synth_ai/environments/service/registry.py +0 -9
synth_ai/environments/stateful/__init__.py +0 -1
synth_ai/environments/stateful/core.py +0 -163
synth_ai/environments/stateful/engine.py +0 -21
synth_ai/environments/stateful/state.py +0 -7
synth_ai/environments/tasks/api.py +0 -19
synth_ai/environments/tasks/core.py +0 -81
synth_ai/environments/tasks/filters.py +0 -40
synth_ai/environments/tasks/utils.py +0 -90
synth_ai/environments/v0_observability/history.py +0 -3
synth_ai/environments/v0_observability/log.py +0 -2
synth_ai/evals/base.py +0 -13
synth_ai/handshake.py +0 -109
synth_ai/http.py +0 -26
synth_ai/http_client.py +0 -136
synth_ai/inference/__init__.py +0 -5
synth_ai/inference/client.py +0 -34
synth_ai/jobs/client.py +0 -271
synth_ai/learning/__init__.py +0 -59
synth_ai/learning/client.py +0 -241
synth_ai/learning/ft_client.py +0 -7
synth_ai/learning/health.py +0 -49
synth_ai/learning/jobs.py +0 -201
synth_ai/learning/rl/client.py +0 -267
synth_ai/learning/rl/contracts.py +0 -27
synth_ai/learning/rl/env_keys.py +0 -166
synth_ai/learning/rl/secrets.py +0 -13
synth_ai/learning/sft/client.py +0 -68
synth_ai/learning/sft/config.py +0 -270
synth_ai/learning/sft/data.py +0 -295
synth_ai/learning/validators.py +0 -49
synth_ai/lm/__init__.py +0 -25
synth_ai/main.py +0 -6
synth_ai/task/__init__.py +0 -102
synth_ai/task/apps/__init__.py +0 -128
synth_ai/task/contracts.py +0 -137
synth_ai/task/datasets.py +0 -108
synth_ai/task/proxy.py +0 -259
synth_ai/task/server.py +0 -424
synth_ai/task/tracing_utils.py +0 -84
synth_ai/task/validators.py +0 -11
synth_ai/tracing_v3/__init__.py +0 -97
synth_ai/tracing_v3/config.py +0 -84
synth_ai/tracing_v3/db_config.py +0 -194
synth_ai/tracing_v3/decorators.py +0 -369
synth_ai/tracing_v3/examples/basic_usage.py +0 -189
synth_ai/tracing_v3/llm_call_record_helpers.py +0 -337
synth_ai/tracing_v3/migration_helper.py +0 -120
synth_ai/tracing_v3/replica_sync.py +0 -258
synth_ai/tracing_v3/session_tracer.py +0 -530
synth_ai/tracing_v3/storage/base.py +0 -210
synth_ai/tracing_v3/storage/config.py +0 -75
synth_ai/tracing_v3/storage/factory.py +0 -39
synth_ai/tracing_v3/storage/utils.py +0 -204
synth_ai/tracing_v3/turso/daemon.py +0 -149
synth_ai/tracing_v3/turso/models.py +0 -469
synth_ai/tracing_v3/turso/native_manager.py +0 -1173
synth_ai/tracing_v3/utils.py +0 -108
synth_ai/v0/api/__init__.py +0 -8
synth_ai/v0/api/models/__init__.py +0 -8
synth_ai/v0/api/models/supported.py +0 -8
synth_ai/v0/config/__init__.py +0 -15
synth_ai/v0/config/base_url.py +0 -12
synth_ai/v0/lm/__init__.py +0 -51
synth_ai/v0/lm/caching/constants.py +0 -6
synth_ai/v0/lm/caching/dbs.py +0 -0
synth_ai/v0/lm/caching/ephemeral.py +0 -100
synth_ai/v0/lm/caching/handler.py +0 -137
synth_ai/v0/lm/caching/initialize.py +0 -11
synth_ai/v0/lm/caching/persistent.py +0 -114
synth_ai/v0/lm/config.py +0 -115
synth_ai/v0/lm/constants.py +0 -32
synth_ai/v0/lm/core/__init__.py +0 -8
synth_ai/v0/lm/core/all.py +0 -73
synth_ai/v0/lm/core/exceptions.py +0 -5
synth_ai/v0/lm/core/main.py +0 -331
synth_ai/v0/lm/core/main_v3.py +0 -594
synth_ai/v0/lm/core/synth_models.py +0 -35
synth_ai/v0/lm/core/vendor_clients.py +0 -190
synth_ai/v0/lm/cost/__init__.py +0 -0
synth_ai/v0/lm/cost/monitor.py +0 -1
synth_ai/v0/lm/cost/statefulness.py +0 -1
synth_ai/v0/lm/injection.py +0 -80
synth_ai/v0/lm/overrides.py +0 -206
synth_ai/v0/lm/provider_support/__init__.py +0 -8
synth_ai/v0/lm/provider_support/anthropic.py +0 -972
synth_ai/v0/lm/provider_support/openai.py +0 -1139
synth_ai/v0/lm/provider_support/suppress_logging.py +0 -31
synth_ai/v0/lm/structured_outputs/__init__.py +0 -0
synth_ai/v0/lm/structured_outputs/handler.py +0 -440
synth_ai/v0/lm/structured_outputs/inject.py +0 -297
synth_ai/v0/lm/structured_outputs/rehabilitate.py +0 -185
synth_ai/v0/lm/tools/__init__.py +0 -3
synth_ai/v0/lm/tools/base.py +0 -172
synth_ai/v0/lm/unified_interface.py +0 -202
synth_ai/v0/lm/vendors/__init__.py +0 -0
synth_ai/v0/lm/vendors/base.py +0 -81
synth_ai/v0/lm/vendors/core/__init__.py +0 -0
synth_ai/v0/lm/vendors/core/anthropic_api.py +0 -387
synth_ai/v0/lm/vendors/core/gemini_api.py +0 -292
synth_ai/v0/lm/vendors/core/mistral_api.py +0 -322
synth_ai/v0/lm/vendors/core/openai_api.py +0 -227
synth_ai/v0/lm/vendors/core/synth_dev_api.py +0 -0
synth_ai/v0/lm/vendors/local/__init__.py +0 -0
synth_ai/v0/lm/vendors/local/ollama.py +0 -0
synth_ai/v0/lm/vendors/openai_standard.py +0 -782
synth_ai/v0/lm/vendors/openai_standard_responses.py +0 -259
synth_ai/v0/lm/vendors/retries.py +0 -22
synth_ai/v0/lm/vendors/supported/__init__.py +0 -0
synth_ai/v0/lm/vendors/supported/custom_endpoint.py +0 -415
synth_ai/v0/lm/vendors/supported/deepseek.py +0 -69
synth_ai/v0/lm/vendors/supported/grok.py +0 -75
synth_ai/v0/lm/vendors/supported/groq.py +0 -16
synth_ai/v0/lm/vendors/supported/ollama.py +0 -15
synth_ai/v0/lm/vendors/supported/openrouter.py +0 -74
synth_ai/v0/lm/vendors/supported/together.py +0 -11
synth_ai/v0/lm/vendors/synth_client.py +0 -835
synth_ai/v0/lm/warmup.py +0 -186
synth_ai/v0/tracing/__init__.py +0 -0
synth_ai/v0/tracing/abstractions.py +0 -224
synth_ai/v0/tracing/base_client.py +0 -91
synth_ai/v0/tracing/client_manager.py +0 -131
synth_ai/v0/tracing/config.py +0 -142
synth_ai/v0/tracing/context.py +0 -146
synth_ai/v0/tracing/decorators.py +0 -682
synth_ai/v0/tracing/events/__init__.py +0 -0
synth_ai/v0/tracing/events/manage.py +0 -147
synth_ai/v0/tracing/events/scope.py +0 -86
synth_ai/v0/tracing/events/store.py +0 -228
synth_ai/v0/tracing/immediate_client.py +0 -151
synth_ai/v0/tracing/local.py +0 -18
synth_ai/v0/tracing/log_client_base.py +0 -73
synth_ai/v0/tracing/retry_queue.py +0 -186
synth_ai/v0/tracing/trackers.py +0 -515
synth_ai/v0/tracing/upload.py +0 -409
synth_ai/v0/tracing/utils.py +0 -9
synth_ai/v0/tracing_v1/__init__.py +0 -16
synth_ai/v0/tracing_v1/abstractions.py +0 -224
synth_ai/v0/tracing_v1/base_client.py +0 -91
synth_ai/v0/tracing_v1/client_manager.py +0 -131
synth_ai/v0/tracing_v1/config.py +0 -142
synth_ai/v0/tracing_v1/context.py +0 -146
synth_ai/v0/tracing_v1/decorators.py +0 -703
synth_ai/v0/tracing_v1/events/__init__.py +0 -0
synth_ai/v0/tracing_v1/events/manage.py +0 -147
synth_ai/v0/tracing_v1/events/scope.py +0 -86
synth_ai/v0/tracing_v1/events/store.py +0 -228
synth_ai/v0/tracing_v1/immediate_client.py +0 -151
synth_ai/v0/tracing_v1/local.py +0 -18
synth_ai/v0/tracing_v1/log_client_base.py +0 -73
synth_ai/v0/tracing_v1/retry_queue.py +0 -186
synth_ai/v0/tracing_v1/trackers.py +0 -515
synth_ai/v0/tracing_v1/upload.py +0 -527
synth_ai/v0/tracing_v1/utils.py +0 -9
synth_ai/v0/tracing_v3/__init__.py +0 -10
synth_ai/v0/tracing_v3/abstractions.py +0 -3
synth_ai/v0/tracing_v3/decorators.py +0 -3
synth_ai/v0/tracing_v3/llm_call_record_helpers.py +0 -3
synth_ai/v0/tracing_v3/session_tracer.py +0 -3
synth_ai-0.2.9.dev11.dist-info/METADATA +0 -191
synth_ai-0.2.9.dev11.dist-info/RECORD +0 -571
synth_ai-0.2.9.dev11.dist-info/entry_points.txt +0 -3
synth_ai-0.2.9.dev11.dist-info/top_level.txt +0 -3
/synth_ai/{demos/demo_task_apps → cli/demo_apps}/crafter/__init__.py +0 -0
/synth_ai/{demos → cli/demo_apps}/demo_task_apps/__init__.py +0 -0
/synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/crafter_fft_4b.toml +0 -0
/synth_ai/{demos → cli/demo_apps}/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +0 -0
/synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
/synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +0 -0
/synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +0 -0
/synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/config.toml +0 -0
/synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +0 -0
/synth_ai/{v0/lm/caching → core/apps}/__init__.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
/synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
/synth_ai/{compound/cais.py → py.typed} +0 -0
/synth_ai/{learning → sdk/learning}/algorithms.py +0 -0
/synth_ai/{learning → sdk/learning}/config.py +0 -0
/synth_ai/{learning → sdk/learning}/constants.py +0 -0
/synth_ai/{learning → sdk/learning}/core.py +0 -0
/synth_ai/{learning → sdk/learning}/gateway.py +0 -0
/synth_ai/{learning → sdk/learning}/rl/__init__.py +0 -0
/synth_ai/{learning → sdk/learning}/rl/config.py +0 -0
/synth_ai/{learning → sdk/learning}/rl_client.py +0 -0
/synth_ai/{learning → sdk/learning}/sft/__init__.py +0 -0
/synth_ai/{learning → sdk/learning}/sse.py +0 -0
/synth_ai/{task → sdk/task}/auth.py +0 -0
/synth_ai/{task → sdk/task}/client.py +0 -0
/synth_ai/{task → sdk/task}/errors.py +0 -0
/synth_ai/{task → sdk/task}/health.py +0 -0
/synth_ai/{task → sdk/task}/json.py +0 -0
/synth_ai/{task → sdk/task}/rubrics.py +0 -0
/synth_ai/{task → sdk/task}/vendors.py +0 -0
{synth_ai-0.2.9.dev11.dist-info → synth_ai-0.4.1.dist-info}/WHEEL +0 -0
{synth_ai-0.2.9.dev11.dist-info → synth_ai-0.4.1.dist-info}/licenses/LICENSE +0 -0

synth_ai/sdk/api/train/validators.py ADDED Viewed

@@ -0,0 +1,2424 @@
+"""SDK-side validation for training configs - catch errors BEFORE sending to backend."""
+import re
+import warnings
+from pathlib import Path
+from typing import Any, List, Tuple
+import click
+import toml
+# Import unknown field validation from CLI module
+from synth_ai.cli.commands.train.prompt_learning_validation import (
+    validate_prompt_learning_config as _validate_unknown_fields,
+)
+from synth_ai.core.telemetry import log_info
+class ConfigValidationError(Exception):
+    """Raised when a training config is invalid."""
+    pass
+# Supported models for prompt learning (GEPA & MIPRO)
+# NOTE: gpt-5-pro is explicitly EXCLUDED - too expensive for prompt learning
+OPENAI_SUPPORTED_MODELS = {
+    "gpt-4o",
+    "gpt-4o-mini",
+    "gpt-4.1",
+    "gpt-4.1-mini",
+    "gpt-4.1-nano",
+    "gpt-5",
+    "gpt-5-mini",
+    "gpt-5-nano",
+    # Explicitly EXCLUDED: "gpt-5-pro" - too expensive
+}
+# Groq supported models - patterns and exact matches
+# Models can be in format "model-name" or "provider/model-name" (e.g., "openai/gpt-oss-20b")
+GROQ_SUPPORTED_PATTERNS = [
+    re.compile(r"^(openai/)?gpt-oss-\d+b"),  # e.g., gpt-oss-20b, openai/gpt-oss-120b
+    re.compile(r"^(llama-3\.3-70b|groq/llama-3\.3-70b)"),  # e.g., llama-3.3-70b-versatile
+    re.compile(r"^(qwen.*32b|groq/qwen.*32b)"),  # e.g., qwen-32b, qwen3-32b, groq/qwen3-32b
+]
+GROQ_EXACT_MATCHES = {
+    "llama-3.3-70b",
+    "llama-3.1-8b-instant",
+    "qwen-32b",
+    "qwen3-32b",
+}
+# Google/Gemini supported models
+GOOGLE_SUPPORTED_MODELS = {
+    "gemini-2.5-pro",
+    "gemini-2.5-pro-gt200k",
+    "gemini-2.5-flash",
+    "gemini-2.5-flash-lite",
+}
+def _is_supported_openai_model(model: str) -> bool:
+    """Check if model is a supported OpenAI model."""
+    model_lower = model.lower().strip()
+    # Strip provider prefix if present (e.g., "openai/gpt-4o" -> "gpt-4o")
+    if "/" in model_lower:
+        model_lower = model_lower.split("/", 1)[1]
+    return model_lower in {m.lower() for m in OPENAI_SUPPORTED_MODELS}
+def _is_supported_groq_model(model: str) -> bool:
+    """Check if model is a supported Groq model."""
+    model_lower = model.lower().strip()
+    # Remove provider prefix if present (e.g., "openai/gpt-oss-20b" -> "gpt-oss-20b")
+    if "/" in model_lower:
+        model_lower = model_lower.split("/", 1)[1]
+    # Check exact matches first
+    if model_lower in {m.lower() for m in GROQ_EXACT_MATCHES}:
+        return True
+    # Check patterns (patterns already handle provider prefix)
+    return any(pattern.match(model.lower().strip()) for pattern in GROQ_SUPPORTED_PATTERNS)
+def _is_supported_google_model(model: str) -> bool:
+    """Check if model is a supported Google/Gemini model."""
+    model_lower = model.lower().strip()
+    # Strip provider prefix if present (e.g., "google/gemini-2.5-flash-lite" -> "gemini-2.5-flash-lite")
+    if "/" in model_lower:
+        model_lower = model_lower.split("/", 1)[1]
+    return model_lower in {m.lower() for m in GOOGLE_SUPPORTED_MODELS}
+def _validate_adaptive_pool_config(
+    adaptive_pool_section: dict[str, Any],
+    prefix: str,  # e.g., "gepa.adaptive_pool" or "mipro.adaptive_pool"
+    errors: list[str],
+) -> None:
+    """Validate adaptive_pool configuration section.
+    Validates all fields in adaptive_pool config including:
+    - Level presets (NONE, LOW, MODERATE, HIGH)
+    - Numeric fields with min/max constraints
+    - Relationship constraints (pool_init_size >= pool_min_size >= anchor_size)
+    - String enum fields (anchor_selection_method, exploration_strategy, etc.)
+    - Heat-up phase configuration
+    Args:
+        adaptive_pool_section: Dict containing adaptive_pool config with fields:
+            - level: Preset level (NONE, LOW, MODERATE, HIGH)
+            - anchor_size: Number of anchor examples (always evaluated)
+            - pool_init_size: Initial pool size
+            - pool_min_size: Target minimum pool size after annealing
+            - warmup_iters: Iterations before starting annealing
+            - anneal_stop_iter: Iteration when pool reaches min_size
+            - pool_update_period: Update informativeness every N generations
+            - min_evals_per_example: Min evals before computing informativeness
+            - k_info_prompts: Number of prompts for informativeness
+            - info_buffer_factor: Buffer factor (0.0-1.0) for preserving info
+            - info_epsilon: Epsilon for informativeness calculations
+            - anchor_selection_method: "random" or "clustering"
+            - exploration_strategy: "random" or "diversity"
+            - heatup_trigger: "after_min_size", "immediate", or "every_N_trials_after_min"
+            - heatup_schedule: "repeat" or "once"
+            - heatup_size: Number of seeds to add during heat-up
+            - heatup_cooldown_trials: Trials to wait before cooling down
+            - heatup_reserve_pool: Optional list of seed IDs for heat-up
+        prefix: Prefix for error messages (e.g., "gepa.adaptive_pool" or "mipro.adaptive_pool")
+        errors: List to append validation errors to
+    """
+    if not isinstance(adaptive_pool_section, dict):
+        errors.append(f"❌ {prefix} must be a table/dict when provided")
+        return
+    # Validate level
+    level = adaptive_pool_section.get("level")
+    if level is not None:
+        valid_levels = {"NONE", "LOW", "MODERATE", "HIGH"}
+        if str(level).upper() not in valid_levels:
+            errors.append(
+                f"❌ {prefix}.level must be one of {valid_levels}, got '{level}'"
+            )
+    # Validate numeric fields
+    for field, min_val in [
+        ("anchor_size", 0),
+        ("pool_init_size", 0),
+        ("pool_min_size", 0),
+        ("warmup_iters", 0),
+        ("anneal_stop_iter", 0),
+        ("pool_update_period", 1),
+        ("min_evals_per_example", 1),
+        ("k_info_prompts", 0),
+    ]:
+        val = adaptive_pool_section.get(field)
+        if val is not None:
+            try:
+                ival = int(val)
+                if ival < min_val:
+                    errors.append(f"❌ {prefix}.{field} must be >= {min_val}, got {ival}")
+            except (TypeError, ValueError):
+                errors.append(f"❌ {prefix}.{field} must be an integer, got {type(val).__name__}")
+    # Validate pool_init_size >= pool_min_size if both provided
+    pool_init = adaptive_pool_section.get("pool_init_size")
+    pool_min = adaptive_pool_section.get("pool_min_size")
+    if pool_init is not None and pool_min is not None:
+        try:
+            pool_init_int = int(pool_init)
+            pool_min_int = int(pool_min)
+            if pool_init_int < pool_min_int:
+                errors.append(
+                    f"❌ {prefix}.pool_init_size ({pool_init}) must be >= pool_min_size ({pool_min})"
+                )
+        except (TypeError, ValueError):
+            pass  # Already validated above
+    # Validate pool_min_size >= anchor_size if both provided
+    anchor_size = adaptive_pool_section.get("anchor_size")
+    if pool_min is not None and anchor_size is not None:
+        try:
+            pool_min_int = int(pool_min)
+            anchor_size_int = int(anchor_size)
+            if pool_min_int < anchor_size_int:
+                errors.append(
+                    f"❌ {prefix}.pool_min_size ({pool_min}) must be >= anchor_size ({anchor_size})"
+                )
+        except (TypeError, ValueError):
+            pass  # Already validated above
+    # Validate info_buffer_factor and info_epsilon
+    for field, min_val, max_val in [("info_buffer_factor", 0.0, 1.0), ("info_epsilon", 0.0, None)]:
+        val = adaptive_pool_section.get(field)
+        if val is not None:
+            try:
+                fval = float(val)
+                if fval < min_val:
+                    errors.append(f"❌ {prefix}.{field} must be >= {min_val}, got {fval}")
+                if max_val is not None and fval > max_val:
+                    errors.append(f"❌ {prefix}.{field} must be <= {max_val}, got {fval}")
+            except (TypeError, ValueError):
+                errors.append(f"❌ {prefix}.{field} must be numeric, got {type(val).__name__}")
+    # Validate string fields
+    anchor_method = adaptive_pool_section.get("anchor_selection_method")
+    if anchor_method is not None and anchor_method not in ("random", "clustering"):
+        errors.append(
+            f"❌ {prefix}.anchor_selection_method must be 'random' or 'clustering', got '{anchor_method}'"
+        )
+    exploration_strategy = adaptive_pool_section.get("exploration_strategy")
+    if exploration_strategy is not None and exploration_strategy not in ("random", "diversity"):
+        errors.append(
+            f"❌ {prefix}.exploration_strategy must be 'random' or 'diversity', got '{exploration_strategy}'"
+        )
+    # Validate heatup fields
+    heatup_trigger = adaptive_pool_section.get("heatup_trigger")
+    if heatup_trigger is not None and heatup_trigger not in ("after_min_size", "immediate", "every_N_trials_after_min"):
+        errors.append(
+            f"❌ {prefix}.heatup_trigger must be 'after_min_size', 'immediate', or 'every_N_trials_after_min', got '{heatup_trigger}'"
+        )
+    heatup_schedule = adaptive_pool_section.get("heatup_schedule")
+    if heatup_schedule is not None and heatup_schedule not in ("repeat", "once"):
+        errors.append(
+            f"❌ {prefix}.heatup_schedule must be 'repeat' or 'once', got '{heatup_schedule}'"
+        )
+    heatup_size = adaptive_pool_section.get("heatup_size")
+    if heatup_size is not None:
+        try:
+            if int(heatup_size) <= 0:
+                errors.append(f"❌ {prefix}.heatup_size must be > 0, got {heatup_size}")
+        except (TypeError, ValueError):
+            errors.append(f"❌ {prefix}.heatup_size must be an integer, got {type(heatup_size).__name__}")
+    heatup_cooldown_trials = adaptive_pool_section.get("heatup_cooldown_trials")
+    if heatup_cooldown_trials is not None:
+        try:
+            if int(heatup_cooldown_trials) < 0:
+                errors.append(f"❌ {prefix}.heatup_cooldown_trials must be >= 0, got {heatup_cooldown_trials}")
+        except (TypeError, ValueError):
+            errors.append(f"❌ {prefix}.heatup_cooldown_trials must be an integer, got {type(heatup_cooldown_trials).__name__}")
+    heatup_reserve_pool = adaptive_pool_section.get("heatup_reserve_pool")
+    if heatup_reserve_pool is not None:
+        if not isinstance(heatup_reserve_pool, list):
+            errors.append(f"❌ {prefix}.heatup_reserve_pool must be a list, got {type(heatup_reserve_pool).__name__}")
+        elif not all(isinstance(s, int) for s in heatup_reserve_pool):
+            errors.append(f"❌ {prefix}.heatup_reserve_pool must contain only integers")
+def _validate_model_for_provider(model: str, provider: str, field_name: str, *, allow_nano: bool = False) -> list[str]:
+    """
+    Validate that a model is supported for the given provider.
+    Models can be specified with or without provider prefix (e.g., "gpt-4o" or "openai/gpt-4o").
+    The provider prefix is stripped before validation.
+    REJECTS gpt-5-pro explicitly (too expensive).
+    REJECTS nano models for proposal/mutation models (unless allow_nano=True).
+    Args:
+        model: Model name to validate
+        provider: Provider name (openai, groq, google)
+        field_name: Field name for error messages (e.g., "prompt_learning.policy.model")
+        allow_nano: If True, allow nano models (for policy models). If False, reject nano models.
+    Returns:
+        List of error messages (empty if valid)
+    """
+    errors: list[str] = []
+    if not model or not isinstance(model, str) or not model.strip():
+        errors.append(f"Missing or empty {field_name}")
+        return errors
+    provider_lower = provider.lower().strip()
+    model_lower = model.lower().strip()
+    # Strip provider prefix if present (e.g., "openai/gpt-4o" -> "gpt-4o")
+    model_without_prefix = model_lower.split("/", 1)[1] if "/" in model_lower else model_lower
+    # Explicitly reject gpt-5-pro (too expensive)
+    if model_without_prefix == "gpt-5-pro":
+        errors.append(
+            f"Model '{model}' is not supported for prompt learning (too expensive).\n"
+            f"  gpt-5-pro is excluded due to high cost ($15/$120 per 1M tokens).\n"
+            f"  Please use a supported model instead."
+        )
+        return errors
+    # Reject nano models for proposal/mutation models (unless explicitly allowed)
+    if not allow_nano and model_without_prefix.endswith("-nano"):
+        errors.append(
+            f"Model '{model}' is not supported for {field_name}.\n"
+            f"  ❌ Nano models (e.g., gpt-4.1-nano, gpt-5-nano) are NOT allowed for proposal/mutation models.\n"
+            f"  \n"
+            f"  Why?\n"
+            f"  Proposal and mutation models need to be SMART and capable of generating high-quality,\n"
+            f"  creative prompt variations. Nano models are too small and lack the reasoning capability\n"
+            f"  needed for effective prompt optimization.\n"
+            f"  \n"
+            f"  ✅ Use a larger model instead:\n"
+            f"     - For OpenAI: gpt-4.1-mini, gpt-4o-mini, gpt-4o, or gpt-4.1\n"
+            f"     - For Groq: openai/gpt-oss-120b, llama-3.3-70b-versatile\n"
+            f"     - For Google: gemini-2.5-flash, gemini-2.5-pro\n"
+            f"  \n"
+            f"  Note: Nano models ARE allowed for policy models (task execution), but NOT for\n"
+            f"  proposal/mutation models (prompt generation)."
+        )
+        return errors
+    if provider_lower == "openai":
+        if not _is_supported_openai_model(model_without_prefix):
+            errors.append(
+                f"Unsupported OpenAI model: '{model}'\n"
+                f"  Supported OpenAI models for prompt learning:\n"
+                f"    - gpt-4o\n"
+                f"    - gpt-4o-mini\n"
+                f"    - gpt-4.1, gpt-4.1-mini, gpt-4.1-nano\n"
+                f"    - gpt-5, gpt-5-mini, gpt-5-nano\n"
+                f"  Note: gpt-5-pro is excluded (too expensive)\n"
+                f"  Got: '{model}'"
+            )
+    elif provider_lower == "groq":
+        # For Groq, check both with and without prefix since models can be "openai/gpt-oss-20b"
+        if not _is_supported_groq_model(model_lower):
+            errors.append(
+                f"Unsupported Groq model: '{model}'\n"
+                f"  Supported Groq models for prompt learning:\n"
+                f"    - gpt-oss-Xb (e.g., gpt-oss-20b, openai/gpt-oss-120b)\n"
+                f"    - llama-3.3-70b (and variants like llama-3.3-70b-versatile)\n"
+                f"    - llama-3.1-8b-instant\n"
+                f"    - qwen/qwen3-32b (and variants)\n"
+                f"  Got: '{model}'"
+            )
+    elif provider_lower == "google":
+        if not _is_supported_google_model(model_without_prefix):
+            errors.append(
+                f"Unsupported Google/Gemini model: '{model}'\n"
+                f"  Supported Google models for prompt learning:\n"
+                f"    - gemini-2.5-pro, gemini-2.5-pro-gt200k\n"
+                f"    - gemini-2.5-flash\n"
+                f"    - gemini-2.5-flash-lite\n"
+                f"  Got: '{model}'"
+            )
+    else:
+        errors.append(
+            f"Unsupported provider: '{provider}'\n"
+            f"  Supported providers for prompt learning: 'openai', 'groq', 'google'\n"
+            f"  Got: '{provider}'"
+        )
+    return errors
+def validate_prompt_learning_config(config_data: dict[str, Any], config_path: Path) -> None:
+    """
+    Validate prompt learning config BEFORE sending to backend.
+    This catches common errors early with clear messages instead of cryptic backend errors.
+    Args:
+        config_data: Parsed TOML/JSON config
+        config_path: Path to config file (for error messages)
+    Raises:
+        ConfigValidationError: If config is invalid
+        click.ClickException: If validation fails (for CLI)
+    """
+    ctx: dict[str, Any] = {"config_path": str(config_path)}
+    log_info("validate_prompt_learning_config invoked", ctx=ctx)
+    errors: list[str] = []
+    # Run unknown field validation (warnings only, doesn't raise)
+    try:
+        validation_result = _validate_unknown_fields(config_data, config_path=config_path)
+        # Print warnings about unknown fields and deprecated sections
+        for warning_msg in validation_result.warnings:
+            warnings.warn(warning_msg, UserWarning, stacklevel=3)
+    except Exception:
+        # Don't fail validation if unknown field check fails
+        pass
+    # Check for prompt_learning section
+    pl_section = config_data.get("prompt_learning")
+    if not pl_section:
+        errors.append(
+            "Missing [prompt_learning] section in config. "
+            "Expected: [prompt_learning] with algorithm, task_app_url, etc."
+        )
+        _raise_validation_errors(errors, config_path)
+        return
+    if not isinstance(pl_section, dict):
+        errors.append(
+            f"[prompt_learning] must be a table/dict, got {type(pl_section).__name__}"
+        )
+        _raise_validation_errors(errors, config_path)
+        return
+    # CRITICAL: Validate algorithm field
+    algorithm = pl_section.get("algorithm")
+    if not algorithm:
+        errors.append(
+            "Missing required field: prompt_learning.algorithm\n"
+            "  Must be one of: 'gepa', 'mipro'\n"
+            "  Example:\n"
+            "    [prompt_learning]\n"
+            "    algorithm = \"gepa\""
+        )
+    elif algorithm not in ("gepa", "mipro"):
+        errors.append(
+            f"Invalid algorithm: '{algorithm}'\n"
+            f"  Must be one of: 'gepa', 'mipro'\n"
+            f"  Got: '{algorithm}'"
+        )
+    # Validate task_app_url
+    task_app_url = pl_section.get("task_app_url")
+    if not task_app_url:
+        errors.append(
+            "Missing required field: prompt_learning.task_app_url\n"
+            "  Example:\n"
+            "    task_app_url = \"http://127.0.0.1:8102\""
+        )
+    elif not isinstance(task_app_url, str):
+        errors.append(
+            f"task_app_url must be a string, got {type(task_app_url).__name__}"
+        )
+    elif not task_app_url.startswith(("http://", "https://")):
+        errors.append(
+            f"task_app_url must start with http:// or https://, got: '{task_app_url}'"
+        )
+    # Validate initial_prompt if present
+    initial_prompt = pl_section.get("initial_prompt")
+    if initial_prompt:
+        if not isinstance(initial_prompt, dict):
+            errors.append(
+                f"prompt_learning.initial_prompt must be a table/dict, got {type(initial_prompt).__name__}"
+            )
+        else:
+            # Validate messages array
+            messages = initial_prompt.get("messages")
+            if messages is not None:
+                if not isinstance(messages, list):
+                    errors.append(
+                        f"prompt_learning.initial_prompt.messages must be an array, got {type(messages).__name__}"
+                    )
+                elif len(messages) == 0:
+                    errors.append(
+                        "prompt_learning.initial_prompt.messages is empty (must have at least one message)"
+                    )
+    # Validate policy config
+    policy = pl_section.get("policy")
+    if not policy or not isinstance(policy, dict):
+        errors.append("Missing [prompt_learning.policy] section or not a table")
+    else:
+        # Enforce inference_mode
+        mode = str(policy.get("inference_mode", "")).strip().lower()
+        if not mode:
+            errors.append("Missing required field: prompt_learning.policy.inference_mode (must be 'synth_hosted')")
+        elif mode != "synth_hosted":
+            errors.append("prompt_learning.policy.inference_mode must be 'synth_hosted' (bring_your_own unsupported)")
+        # Required fields for synth_hosted
+        provider = (policy.get("provider") or "").strip()
+        model = (policy.get("model") or "").strip()
+        if not provider:
+            errors.append("Missing required field: prompt_learning.policy.provider")
+        if not model:
+            errors.append("Missing required field: prompt_learning.policy.model")
+        else:
+            # Validate model is supported for the provider
+            if provider:
+                errors.extend(_validate_model_for_provider(
+                    model, provider, "prompt_learning.policy.model", allow_nano=True
+                ))
+        # VALIDATION: Reject inference_url in config - trainer must provide it in rollout requests
+        if "inference_url" in policy:
+            errors.append(
+                "inference_url must not be specified in [prompt_learning.policy]. "
+                "The trainer provides the inference URL in rollout requests. "
+                "Remove inference_url from your config file."
+            )
+        if "api_base" in policy:
+            errors.append(
+                "api_base must not be specified in [prompt_learning.policy]. "
+                "The trainer provides the inference URL in rollout requests. "
+                "Remove api_base from your config file."
+            )
+        if "base_url" in policy:
+            errors.append(
+                "base_url must not be specified in [prompt_learning.policy]. "
+                "The trainer provides the inference URL in rollout requests. "
+                "Remove base_url from your config file."
+            )
+    # Validate proxy_models config (can be at top-level or algorithm-specific)
+    proxy_models_section = pl_section.get("proxy_models")
+    if proxy_models_section:
+        if not isinstance(proxy_models_section, dict):
+            errors.append(f"prompt_learning.proxy_models must be a table/dict, got {type(proxy_models_section).__name__}")
+        else:
+            required_fields = ["hi_provider", "hi_model", "lo_provider", "lo_model"]
+            for field in required_fields:
+                if not proxy_models_section.get(field):
+                    errors.append(f"prompt_learning.proxy_models.{field} is required")
+            # Validate numeric fields
+            for field, min_val in [("n_min_hi", 0), ("r2_thresh", 0.0), ("r2_stop", 0.0), ("sigma_max", 0.0), ("sigma_stop", 0.0), ("verify_every", 0)]:
+                val = proxy_models_section.get(field)
+                if val is not None:
+                    try:
+                        if field in ("r2_thresh", "r2_stop"):
+                            fval = float(val)
+                            if not (0.0 <= fval <= 1.0):
+                                errors.append(f"prompt_learning.proxy_models.{field} must be between 0.0 and 1.0, got {fval}")
+                        elif field.startswith("sigma"):
+                            fval = float(val)
+                            if fval < min_val:
+                                errors.append(f"prompt_learning.proxy_models.{field} must be >= {min_val}, got {fval}")
+                        else:
+                            ival = int(val)
+                            if ival < min_val:
+                                errors.append(f"prompt_learning.proxy_models.{field} must be >= {min_val}, got {ival}")
+                    except (TypeError, ValueError):
+                        errors.append(f"prompt_learning.proxy_models.{field} must be numeric, got {type(val).__name__}")
+            # Validate provider/model combinations
+            if proxy_models_section.get("hi_provider") and proxy_models_section.get("hi_model"):
+                hi_errors = _validate_model_for_provider(
+                    proxy_models_section["hi_model"],
+                    proxy_models_section["hi_provider"],
+                    "prompt_learning.proxy_models.hi_model",
+                    allow_nano=True,
+                )
+                errors.extend(hi_errors)
+            if proxy_models_section.get("lo_provider") and proxy_models_section.get("lo_model"):
+                lo_errors = _validate_model_for_provider(
+                    proxy_models_section["lo_model"],
+                    proxy_models_section["lo_provider"],
+                    "prompt_learning.proxy_models.lo_model",
+                    allow_nano=True,
+                )
+                errors.extend(lo_errors)
+    # Validate judge config (shared by GEPA and MIPRO)
+    judge_section = pl_section.get("judge") or {}
+    if judge_section:
+        if not isinstance(judge_section, dict):
+            errors.append(f"prompt_learning.judge must be a table/dict, got {type(judge_section).__name__}")
+        else:
+            reward_source = str(judge_section.get("reward_source", "task_app")).strip().lower()
+            enabled = bool(judge_section.get("enabled"))
+            if reward_source and reward_source not in {"task_app", "judge", "fused"}:
+                errors.append("prompt_learning.judge.reward_source must be 'task_app', 'judge', or 'fused'")
+            backend_base = str(judge_section.get("backend_base", "") or "").strip()
+            backend_provider = str(judge_section.get("backend_provider", "") or "").strip()
+            backend_model = str(judge_section.get("backend_model", "") or "").strip()
+            if enabled:
+                pass
+            if reward_source == "fused":
+                weight_event = judge_section.get("weight_event", 0.0)
+                weight_outcome = judge_section.get("weight_outcome", 0.0)
+                try:
+                    weight_event_f = float(weight_event)
+                except (TypeError, ValueError):
+                    errors.append("prompt_learning.judge.weight_event must be numeric")
+                    weight_event_f = 0.0
+                try:
+                    weight_outcome_f = float(weight_outcome)
+                except (TypeError, ValueError):
+                    errors.append("prompt_learning.judge.weight_outcome must be numeric")
+                    weight_outcome_f = 0.0
+                if weight_event_f <= 0 and weight_outcome_f <= 0:
+                    errors.append(
+                        "prompt_learning.judge.reward_source='fused' requires weight_event > 0 or weight_outcome > 0"
+                    )
+    # Check for multi-stage/multi-module pipeline config
+    initial_prompt = pl_section.get("initial_prompt", {})
+    pipeline_modules: list[str | dict[str, Any]] = []
+    if isinstance(initial_prompt, dict):
+        metadata = initial_prompt.get("metadata", {})
+        pipeline_modules = metadata.get("pipeline_modules", [])
+        if not isinstance(pipeline_modules, list):
+            pipeline_modules = []
+    has_multi_stage = isinstance(pipeline_modules, list) and len(pipeline_modules) > 0
+    # Validate algorithm-specific config
+    if algorithm == "gepa":
+        gepa_config = pl_section.get("gepa")
+        if not gepa_config or not isinstance(gepa_config, dict):
+            errors.append("Missing [prompt_learning.gepa] section for GEPA algorithm")
+        else:
+            # Multi-stage validation
+            modules_config = gepa_config.get("modules")
+            if has_multi_stage:
+                if not modules_config or not isinstance(modules_config, list) or len(modules_config) == 0:
+                    errors.append(
+                        f"GEPA multi-stage pipeline detected (found {len(pipeline_modules)} modules in "
+                        f"prompt_learning.initial_prompt.metadata.pipeline_modules), "
+                        f"but [prompt_learning.gepa.modules] is missing or empty. "
+                        f"Define module configs for each pipeline stage."
+                    )
+                else:
+                    # Validate module IDs match pipeline_modules
+                    module_ids = []
+                    for m in modules_config:
+                        if isinstance(m, dict):
+                            module_id = m.get("module_id") or m.get("stage_id")
+                            if module_id:
+                                module_ids.append(str(module_id).strip())
+                        elif hasattr(m, "module_id"):
+                            module_ids.append(str(m.module_id).strip())
+                        elif hasattr(m, "stage_id"):
+                            module_ids.append(str(m.stage_id).strip())
+                    # Extract pipeline module names (can be strings or dicts with 'name' field)
+                    pipeline_module_names = []
+                    for m in pipeline_modules:
+                        if isinstance(m, str):
+                            pipeline_module_names.append(m.strip())
+                        elif isinstance(m, dict):
+                            name = m.get("name") or m.get("module_id") or m.get("stage_id")
+                            if name:
+                                pipeline_module_names.append(str(name).strip())
+                    # Check for missing modules
+                    missing_modules = set(pipeline_module_names) - set(module_ids)
+                    if missing_modules:
+                        errors.append(
+                            f"Pipeline modules {sorted(missing_modules)} are missing from "
+                            f"[prompt_learning.gepa.modules]. Each pipeline module must have a corresponding "
+                            f"module config with matching module_id."
+                        )
+                    # Check for extra modules (warn but don't error)
+                    extra_modules = set(module_ids) - set(pipeline_module_names)
+                    if extra_modules:
+                        # This is a warning, not an error - extra modules are allowed
+                        pass
+            # Numeric sanity checks
+            def _pos_int(name: str) -> None:
+                val = gepa_config.get(name)
+                if val is not None:
+                    try:
+                        ival = int(val)
+                        if ival <= 0:
+                            errors.append(f"prompt_learning.gepa.{name} must be > 0")
+                    except Exception:
+                        errors.append(f"prompt_learning.gepa.{name} must be an integer")
+            def _pos_int_nested(section: str, name: str) -> None:
+                """Check positive int in nested section."""
+                section_config = gepa_config.get(section)
+                if section_config and isinstance(section_config, dict):
+                    val = section_config.get(name)
+                    if val is not None:
+                        try:
+                            ival = int(val)
+                            if ival <= 0:
+                                errors.append(f"prompt_learning.gepa.{section}.{name} must be > 0")
+                        except Exception:
+                            errors.append(f"prompt_learning.gepa.{section}.{name} must be an integer")
+            def _non_neg_int(name: str) -> None:
+                """Check non-negative int."""
+                val = gepa_config.get(name)
+                if val is not None:
+                    try:
+                        ival = int(val)
+                        if ival < 0:
+                            errors.append(f"prompt_learning.gepa.{name} must be >= 0")
+                    except Exception:
+                        errors.append(f"prompt_learning.gepa.{name} must be an integer")
+            def _rate_float(name: str) -> None:
+                """Check float in [0.0, 1.0] range."""
+                val = gepa_config.get(name)
+                if val is not None:
+                    try:
+                        fval = float(val)
+                        if not (0.0 <= fval <= 1.0):
+                            errors.append(f"prompt_learning.gepa.{name} must be between 0.0 and 1.0")
+                    except Exception:
+                        errors.append(f"prompt_learning.gepa.{name} must be numeric")
+            def _pos_float(name: str) -> None:
+                """Check positive float."""
+                val = gepa_config.get(name)
+                if val is not None:
+                    try:
+                        fval = float(val)
+                        if fval <= 0:
+                            errors.append(f"prompt_learning.gepa.{name} must be > 0")
+                    except Exception:
+                        errors.append(f"prompt_learning.gepa.{name} must be numeric")
+            # Required positive integers
+            for fld in ("initial_population_size", "num_generations", "children_per_generation", "max_concurrent_rollouts"):
+                _pos_int(fld)
+            # Nested rollout config validation
+            _pos_int_nested("rollout", "budget")
+            _pos_int_nested("rollout", "max_concurrent")
+            _pos_int_nested("rollout", "minibatch_size")
+            # Nested population config validation
+            _pos_int_nested("population", "initial_size")
+            _pos_int_nested("population", "num_generations")
+            _pos_int_nested("population", "children_per_generation")
+            _rate_float("mutation_rate")  # Can be at top level or in mutation section
+            _rate_float("crossover_rate")  # Can be at top level or in population section
+            _pos_float("selection_pressure")  # Must be >= 1.0
+            selection_pressure = gepa_config.get("selection_pressure")
+            if selection_pressure is not None:
+                try:
+                    sp = float(selection_pressure)
+                    if sp < 1.0:
+                        errors.append("prompt_learning.gepa.selection_pressure must be >= 1.0")
+                except Exception:
+                    pass  # Already caught by type check
+            _non_neg_int("patience_generations")
+            # Nested archive config validation
+            _pos_int_nested("archive", "size")
+            _pos_int_nested("archive", "pareto_set_size")
+            _pos_float("pareto_eps")  # Must be > 0, typically very small
+            _rate_float("feedback_fraction")
+            # Nested mutation config validation
+            mutation_config = gepa_config.get("mutation")
+            if mutation_config and isinstance(mutation_config, dict):
+                _rate_float("mutation_rate")  # Check in mutation section too
+                mutation_model = mutation_config.get("llm_model")
+                mutation_provider = mutation_config.get("llm_provider", "").strip()
+                if mutation_model:
+                    if not mutation_provider:
+                        errors.append(
+                            "Missing required field: prompt_learning.gepa.mutation.llm_provider\n"
+                            "  Required when prompt_learning.gepa.mutation.llm_model is set"
+                        )
+                    else:
+                        errors.extend(_validate_model_for_provider(
+                            mutation_model, mutation_provider, "prompt_learning.gepa.mutation.llm_model", allow_nano=False
+                        ))
+            # Top-level mutation_rate and crossover_rate (if not in nested sections)
+            if not (mutation_config and isinstance(mutation_config, dict) and "rate" in mutation_config):
+                _rate_float("mutation_rate")
+            population_config = gepa_config.get("population")
+            if not (population_config and isinstance(population_config, dict) and "crossover_rate" in population_config):
+                _rate_float("crossover_rate")
+            # Budget cap
+            max_spend = gepa_config.get("max_spend_usd")
+            if max_spend is not None:
+                try:
+                    f = float(max_spend)
+                    if f <= 0:
+                        errors.append("prompt_learning.gepa.max_spend_usd must be > 0 when provided")
+                except (ValueError, TypeError):
+                    errors.append("prompt_learning.gepa.max_spend_usd must be numeric")
+            # Rollout budget validation
+            rollout_config = gepa_config.get("rollout")
+            rollout_budget = None
+            if rollout_config and isinstance(rollout_config, dict):
+                rollout_budget = rollout_config.get("budget")
+            if rollout_budget is None:
+                rollout_budget = gepa_config.get("rollout_budget")
+            if rollout_budget is not None:
+                try:
+                    rb = int(rollout_budget)
+                    if rb <= 0:
+                        errors.append("prompt_learning.gepa.rollout.budget (or rollout_budget) must be > 0 when provided")
+                except Exception:
+                    errors.append("prompt_learning.gepa.rollout.budget (or rollout_budget) must be an integer")
+            # Minibatch size validation
+            minibatch_size = None
+            if rollout_config and isinstance(rollout_config, dict):
+                minibatch_size = rollout_config.get("minibatch_size")
+            if minibatch_size is None:
+                minibatch_size = gepa_config.get("minibatch_size")
+            if minibatch_size is not None:
+                try:
+                    mbs = int(minibatch_size)
+                    if mbs <= 0:
+                        errors.append("prompt_learning.gepa.rollout.minibatch_size (or minibatch_size) must be > 0")
+                except Exception:
+                    errors.append("prompt_learning.gepa.rollout.minibatch_size (or minibatch_size) must be an integer")
+            # Proposer type validation
+            proposer_type = gepa_config.get("proposer_type", "dspy")
+            if proposer_type not in ("dspy", "spec", "synth", "gepa-ai"):
+                errors.append(
+                    f"Invalid proposer_type: '{proposer_type}'\n"
+                    f"  Must be one of: 'dspy', 'spec', 'synth', 'gepa-ai'\n"
+                    f"  Got: '{proposer_type}'"
+                )
+            # Proposer effort validation
+            proposer_effort = str(gepa_config.get("proposer_effort", "LOW")).upper()
+            valid_effort_levels = {"LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"}
+            if proposer_effort not in valid_effort_levels:
+                errors.append(
+                    f"Invalid proposer_effort: '{proposer_effort}'\n"
+                    f"  Must be one of: {', '.join(sorted(valid_effort_levels))}\n"
+                    f"  Got: '{proposer_effort}'"
+                )
+            # Proposer output tokens validation
+            proposer_output_tokens = str(gepa_config.get("proposer_output_tokens", "FAST")).upper()
+            valid_output_tokens = {"RAPID", "FAST", "SLOW"}
+            if proposer_output_tokens not in valid_output_tokens:
+                errors.append(
+                    f"Invalid proposer_output_tokens: '{proposer_output_tokens}'\n"
+                    f"  Must be one of: {', '.join(sorted(valid_output_tokens))}\n"
+                    f"  Got: '{proposer_output_tokens}'"
+                )
+            # Note: RAPID can now be used with any proposer_effort level (5000 tokens)
+            # Spec validation when proposer_type is "spec"
+            if proposer_type == "spec":
+                spec_path = gepa_config.get("spec_path")
+                if not spec_path:
+                    errors.append(
+                        "Missing required field: prompt_learning.gepa.spec_path\n"
+                        "  Required when proposer_type='spec'\n"
+                        "  Example:\n"
+                        "    [prompt_learning.gepa]\n"
+                        "    proposer_type = \"spec\"\n"
+                        "    spec_path = \"examples/task_apps/banking77/banking77_spec.json\""
+                    )
+                else:
+                    # Validate spec_max_tokens if provided
+                    spec_max_tokens = gepa_config.get("spec_max_tokens")
+                    if spec_max_tokens is not None:
+                        try:
+                            smt = int(spec_max_tokens)
+                            if smt <= 0:
+                                errors.append("prompt_learning.gepa.spec_max_tokens must be > 0")
+                        except Exception:
+                            errors.append("prompt_learning.gepa.spec_max_tokens must be an integer")
+                    # Validate spec_priority_threshold if provided
+                    spec_priority_threshold = gepa_config.get("spec_priority_threshold")
+                    if spec_priority_threshold is not None:
+                        try:
+                            spt = int(spec_priority_threshold)
+                            if spt < 0:
+                                errors.append("prompt_learning.gepa.spec_priority_threshold must be >= 0")
+                        except Exception:
+                            errors.append("prompt_learning.gepa.spec_priority_threshold must be an integer")
+            # Archive size validation
+            archive_config = gepa_config.get("archive")
+            archive_size = None
+            if archive_config and isinstance(archive_config, dict):
+                archive_size = archive_config.get("size")
+            if archive_size is None:
+                archive_size = gepa_config.get("archive_size")
+            if archive_size is not None:
+                try:
+                    asize = int(archive_size)
+                    if asize <= 0:
+                        errors.append("prompt_learning.gepa.archive.size (or archive_size) must be > 0")
+                except Exception:
+                    errors.append("prompt_learning.gepa.archive.size (or archive_size) must be an integer")
+            # CRITICAL: Validate pareto_set_size vs seeds BEFORE submitting to backend
+            # This catches config errors immediately instead of after job submission
+            eval_config = gepa_config.get("evaluation")
+            if eval_config and isinstance(eval_config, dict):
+                train_seeds = eval_config.get("seeds") or eval_config.get("train_seeds")
+                if train_seeds and isinstance(train_seeds, list) and len(train_seeds) > 0:
+                    total_seeds = len(train_seeds)
+                    # Get pareto_set_size (can be in archive section or top-level)
+                    pareto_set_size = None
+                    if archive_config and isinstance(archive_config, dict):
+                        pareto_set_size = archive_config.get("pareto_set_size")
+                    if pareto_set_size is None:
+                        pareto_set_size = gepa_config.get("pareto_set_size", 64)  # Default from backend
+                    try:
+                        pareto_count = int(pareto_set_size)
+                        feedback_fraction = 0.5  # Default
+                        if archive_config and isinstance(archive_config, dict):
+                            feedback_fraction = archive_config.get("feedback_fraction", 0.5)
+                        if feedback_fraction is None:
+                            feedback_fraction = gepa_config.get("feedback_fraction", 0.5)
+                        feedback_fraction = float(feedback_fraction)
+                        # Calculate split
+                        feedback_count = total_seeds - pareto_count
+                        # Constants matching backend
+                        min_pareto_set_size = 10
+                        min_feedback_seeds = 3
+                        # Validate pareto_set_size <= total_seeds
+                        if pareto_count > total_seeds:
+                            errors.append(
+                                f"CONFIG ERROR: pareto_set_size={pareto_count} > total_seeds={total_seeds}. "
+                                f"Increase [prompt_learning.gepa.evaluation].seeds or decrease "
+                                f"[prompt_learning.gepa.archive].pareto_set_size. "
+                                f"Seeds: {train_seeds[:10]}{'...' if len(train_seeds) > 10 else ''}"
+                            )
+                        # Validate pareto_set_size >= min_pareto_set_size
+                        if pareto_count < min_pareto_set_size:
+                            errors.append(
+                                f"CONFIG ERROR: pareto_set_size={pareto_count} < MIN_PARETO_SET_SIZE={min_pareto_set_size}. "
+                                f"Increase [prompt_learning.gepa.archive].pareto_set_size to at least {min_pareto_set_size}. "
+                                f"Below this threshold, accuracy estimates are too noisy for reliable optimization."
+                            )
+                        # Validate feedback_count >= min_feedback_seeds
+                        if feedback_count < min_feedback_seeds:
+                            errors.append(
+                                f"CONFIG ERROR: feedback_count={feedback_count} < MIN_FEEDBACK_SEEDS={min_feedback_seeds}. "
+                                f"Increase total seeds or decrease pareto_set_size to ensure at least {min_feedback_seeds} feedback seeds. "
+                                f"Below this threshold, reflection prompts lack sufficient diversity."
+                            )
+                    except (ValueError, TypeError):
+                        pass  # Type errors already caught by _pos_int_nested above
+            # Pareto eps validation
+            pareto_eps = None
+            if archive_config and isinstance(archive_config, dict):
+                pareto_eps = archive_config.get("pareto_eps")
+            if pareto_eps is None:
+                pareto_eps = gepa_config.get("pareto_eps")
+            if pareto_eps is not None:
+                try:
+                    pe = float(pareto_eps)
+                    if pe <= 0:
+                        errors.append("prompt_learning.gepa.archive.pareto_eps (or pareto_eps) must be > 0")
+                    elif pe >= 1.0:
+                        errors.append("prompt_learning.gepa.archive.pareto_eps (or pareto_eps) should be < 1.0 (typically 1e-6)")
+                except Exception:
+                    errors.append("prompt_learning.gepa.archive.pareto_eps (or pareto_eps) must be numeric")
+            # Feedback fraction validation
+            feedback_fraction = None
+            if archive_config and isinstance(archive_config, dict):
+                feedback_fraction = archive_config.get("feedback_fraction")
+            if feedback_fraction is None:
+                feedback_fraction = gepa_config.get("feedback_fraction")
+            if feedback_fraction is not None:
+                try:
+                    ff = float(feedback_fraction)
+                    if not (0.0 <= ff <= 1.0):
+                        errors.append("prompt_learning.gepa.archive.feedback_fraction (or feedback_fraction) must be between 0.0 and 1.0")
+                except Exception:
+                    errors.append("prompt_learning.gepa.archive.feedback_fraction (or feedback_fraction) must be numeric")
+            # Token counting model validation (should be a valid model name)
+            token_config = gepa_config.get("token")
+            token_counting_model = None
+            if token_config and isinstance(token_config, dict):
+                token_counting_model = token_config.get("counting_model")
+            if token_counting_model is None:
+                token_counting_model = gepa_config.get("token_counting_model")
+            if token_counting_model and (not isinstance(token_counting_model, str) or not token_counting_model.strip()):
+                # Basic validation - should be a non-empty string
+                errors.append("prompt_learning.gepa.token.counting_model (or token_counting_model) must be a non-empty string")
+            # Module/stage validation for multi-stage
+            if has_multi_stage:
+                modules_config = gepa_config.get("modules")
+                if modules_config and isinstance(modules_config, list):
+                    for idx, module_entry in enumerate(modules_config):
+                        if isinstance(module_entry, dict):
+                            module_id = module_entry.get("module_id") or module_entry.get("stage_id") or f"module_{idx}"
+                            max_instruction_slots = module_entry.get("max_instruction_slots")
+                            max_tokens = module_entry.get("max_tokens")
+                            allowed_tools = module_entry.get("allowed_tools")
+                            # Validate max_instruction_slots
+                            if max_instruction_slots is not None:
+                                try:
+                                    mis = int(max_instruction_slots)
+                                    if mis < 1:
+                                        errors.append(
+                                            f"prompt_learning.gepa.modules[{idx}].max_instruction_slots must be >= 1"
+                                        )
+                                except Exception:
+                                    errors.append(
+                                        f"prompt_learning.gepa.modules[{idx}].max_instruction_slots must be an integer"
+                                    )
+                            # Validate max_tokens
+                            if max_tokens is not None:
+                                try:
+                                    mt = int(max_tokens)
+                                    if mt <= 0:
+                                        errors.append(
+                                            f"prompt_learning.gepa.modules[{idx}].max_tokens must be > 0"
+                                        )
+                                except Exception:
+                                    errors.append(
+                                        f"prompt_learning.gepa.modules[{idx}].max_tokens must be an integer"
+                                    )
+                            # Validate allowed_tools
+                            if allowed_tools is not None:
+                                if not isinstance(allowed_tools, list):
+                                    errors.append(
+                                        f"prompt_learning.gepa.modules[{idx}].allowed_tools must be a list"
+                                    )
+                                else:
+                                    if len(allowed_tools) == 0:
+                                        errors.append(
+                                            f"prompt_learning.gepa.modules[{idx}].allowed_tools cannot be empty (use null/omit to allow all tools)"
+                                        )
+                                    else:
+                                        # Check for duplicates
+                                        seen_tools = set()
+                                        for tool_idx, tool in enumerate(allowed_tools):
+                                            if not isinstance(tool, str):
+                                                errors.append(
+                                                    f"prompt_learning.gepa.modules[{idx}].allowed_tools[{tool_idx}] must be a string"
+                                                )
+                                            elif not tool.strip():
+                                                errors.append(
+                                                    f"prompt_learning.gepa.modules[{idx}].allowed_tools[{tool_idx}] cannot be empty"
+                                                )
+                                            elif tool.strip() in seen_tools:
+                                                errors.append(
+                                                    f"prompt_learning.gepa.modules[{idx}].allowed_tools contains duplicate '{tool.strip()}'"
+                                                )
+                                            else:
+                                                seen_tools.add(tool.strip())
+                            # Validate per-module policy config (REQUIRED)
+                            module_policy = module_entry.get("policy")
+                            if module_policy is None:
+                                errors.append(
+                                    f"❌ gepa.modules[{idx}]: [policy] table is REQUIRED. "
+                                    f"Each module must have its own policy configuration with 'model' and 'provider' fields."
+                                )
+                            elif not isinstance(module_policy, dict):
+                                errors.append(
+                                    f"❌ gepa.modules[{idx}]: [policy] must be a table/dict, got {type(module_policy).__name__}"
+                                )
+                            else:
+                                # Validate required fields in module policy
+                                if not module_policy.get("model"):
+                                    errors.append(
+                                        f"❌ gepa.modules[{idx}]: [policy].model is required"
+                                    )
+                                if not module_policy.get("provider"):
+                                    errors.append(
+                                        f"❌ gepa.modules[{idx}]: [policy].provider is required"
+                                    )
+                                # Validate model/provider combination
+                                module_model = module_policy.get("model")
+                                module_provider = module_policy.get("provider")
+                                if module_model and module_provider:
+                                    errors.extend(_validate_model_for_provider(
+                                        module_model, module_provider,
+                                        f"prompt_learning.gepa.modules[{idx}].policy.model",
+                                        allow_nano=True,  # Policy models can be nano
+                                    ))
+                                # Reject inference_url in module policy (trainer provides it)
+                                if "inference_url" in module_policy:
+                                    errors.append(
+                                        f"❌ gepa.modules[{idx}]: [policy].inference_url must not be specified. "
+                                        f"The trainer provides the inference URL in rollout requests. Remove inference_url from module policy."
+                                    )
+                                if "api_base" in module_policy:
+                                    errors.append(
+                                        f"❌ gepa.modules[{idx}]: [policy].api_base must not be specified. "
+                                        f"Remove api_base from module policy."
+                                    )
+                                if "base_url" in module_policy:
+                                    errors.append(
+                                        f"❌ gepa.modules[{idx}]: [policy].base_url must not be specified. "
+                                        f"Remove base_url from module policy."
+                                    )
+    elif algorithm == "mipro":
+        mipro_config = pl_section.get("mipro")
+        if not mipro_config or not isinstance(mipro_config, dict):
+            errors.append("Missing [prompt_learning.mipro] section for MIPRO algorithm")
+        else:
+            # Validate required MIPRO fields
+            def _pos_int(name: str) -> None:
+                val = mipro_config.get(name)
+                if val is not None:
+                    try:
+                        ival = int(val)
+                        if ival <= 0:
+                            errors.append(f"prompt_learning.mipro.{name} must be > 0")
+                    except Exception:
+                        errors.append(f"prompt_learning.mipro.{name} must be an integer")
+            def _non_neg_int(name: str) -> None:
+                """Check non-negative int."""
+                val = mipro_config.get(name)
+                if val is not None:
+                    try:
+                        ival = int(val)
+                        if ival < 0:
+                            errors.append(f"prompt_learning.mipro.{name} must be >= 0")
+                    except Exception:
+                        errors.append(f"prompt_learning.mipro.{name} must be an integer")
+            def _rate_float(name: str) -> None:
+                """Check float in [0.0, 1.0] range."""
+                val = mipro_config.get(name)
+                if val is not None:
+                    try:
+                        fval = float(val)
+                        if not (0.0 <= fval <= 1.0):
+                            errors.append(f"prompt_learning.mipro.{name} must be between 0.0 and 1.0")
+                    except Exception:
+                        errors.append(f"prompt_learning.mipro.{name} must be numeric")
+            def _pos_float(name: str) -> None:
+                """Check positive float."""
+                val = mipro_config.get(name)
+                if val is not None:
+                    try:
+                        fval = float(val)
+                        if fval <= 0:
+                            errors.append(f"prompt_learning.mipro.{name} must be > 0")
+                    except Exception:
+                        errors.append(f"prompt_learning.mipro.{name} must be numeric")
+            # Required numeric fields
+            for fld in ("num_iterations", "num_evaluations_per_iteration", "batch_size", "max_concurrent"):
+                _pos_int(fld)
+            # Additional MIPRO numeric validations
+            _pos_int("max_demo_set_size")
+            _pos_int("max_demo_sets")
+            _pos_int("max_instruction_sets")
+            _pos_int("full_eval_every_k")
+            _pos_int("instructions_per_batch")
+            _pos_int("max_instructions")
+            _pos_int("duplicate_retry_limit")
+            # Validate meta_model if set (optional - backend applies defaults)
+            meta_model = mipro_config.get("meta_model")
+            meta_model_provider = mipro_config.get("meta_model_provider", "").strip()
+            if meta_model:
+                # If meta_model is explicitly set, validate it
+                if not meta_model_provider:
+                    errors.append(
+                        "Missing required field: prompt_learning.mipro.meta_model_provider\n"
+                        "  Required when prompt_learning.mipro.meta_model is set"
+                    )
+                else:
+                    errors.extend(_validate_model_for_provider(
+                        meta_model, meta_model_provider, "prompt_learning.mipro.meta_model", allow_nano=False
+                    ))
+            # If meta_model is not set, backend will use defaults (llama-3.3-70b-versatile/groq)
+            # Validate meta model temperature
+            meta_temperature = mipro_config.get("meta_model_temperature")
+            if meta_temperature is not None:
+                try:
+                    temp = float(meta_temperature)
+                    if temp < 0.0:
+                        errors.append("prompt_learning.mipro.meta_model_temperature must be >= 0.0")
+                except Exception:
+                    errors.append("prompt_learning.mipro.meta_model_temperature must be numeric")
+            # Validate meta model max_tokens
+            meta_max_tokens = mipro_config.get("meta_model_max_tokens")
+            if meta_max_tokens is not None:
+                try:
+                    mmt = int(meta_max_tokens)
+                    if mmt <= 0:
+                        errors.append("prompt_learning.mipro.meta_model_max_tokens must be > 0")
+                except Exception:
+                    errors.append("prompt_learning.mipro.meta_model_max_tokens must be an integer")
+            # Validate generate_at_iterations
+            generate_at_iterations = mipro_config.get("generate_at_iterations")
+            if generate_at_iterations is not None:
+                if not isinstance(generate_at_iterations, list):
+                    errors.append("prompt_learning.mipro.generate_at_iterations must be a list")
+                else:
+                    for idx, iter_val in enumerate(generate_at_iterations):
+                        try:
+                            iter_int = int(iter_val)
+                            if iter_int < 0:
+                                errors.append(
+                                    f"prompt_learning.mipro.generate_at_iterations[{idx}] must be >= 0"
+                                )
+                        except Exception:
+                            errors.append(
+                                f"prompt_learning.mipro.generate_at_iterations[{idx}] must be an integer"
+                            )
+            # Validate spec configuration
+            spec_path = mipro_config.get("spec_path")
+            if spec_path:
+                # Validate spec_max_tokens if provided
+                spec_max_tokens = mipro_config.get("spec_max_tokens")
+                if spec_max_tokens is not None:
+                    try:
+                        smt = int(spec_max_tokens)
+                        if smt <= 0:
+                            errors.append("prompt_learning.mipro.spec_max_tokens must be > 0")
+                    except Exception:
+                        errors.append("prompt_learning.mipro.spec_max_tokens must be an integer")
+                # Validate spec_priority_threshold if provided
+                spec_priority_threshold = mipro_config.get("spec_priority_threshold")
+                if spec_priority_threshold is not None:
+                    try:
+                        spt = int(spec_priority_threshold)
+                        if spt < 0:
+                            errors.append("prompt_learning.mipro.spec_priority_threshold must be >= 0")
+                    except Exception:
+                        errors.append("prompt_learning.mipro.spec_priority_threshold must be an integer")
+            # Validate modules/stages configuration
+            modules_config = mipro_config.get("modules")
+            if modules_config and isinstance(modules_config, list):
+                max_instruction_sets = mipro_config.get("max_instruction_sets", 128)
+                max_demo_sets = mipro_config.get("max_demo_sets", 128)
+                seen_module_ids = set()
+                seen_stage_ids = set()
+                for module_idx, module_entry in enumerate(modules_config):
+                    if not isinstance(module_entry, dict):
+                        errors.append(
+                            f"prompt_learning.mipro.modules[{module_idx}] must be a table/dict"
+                        )
+                        continue
+                    module_id = module_entry.get("module_id") or module_entry.get("id") or f"module_{module_idx}"
+                    if module_id in seen_module_ids:
+                        errors.append(
+                            f"Duplicate module_id '{module_id}' in prompt_learning.mipro.modules"
+                        )
+                    seen_module_ids.add(module_id)
+                    # Validate stages
+                    stages = module_entry.get("stages")
+                    if stages is not None:
+                        if not isinstance(stages, list):
+                            errors.append(
+                                f"prompt_learning.mipro.modules[{module_idx}].stages must be a list"
+                            )
+                        else:
+                            for stage_idx, stage_entry in enumerate(stages):
+                                if isinstance(stage_entry, dict):
+                                    stage_id = stage_entry.get("stage_id") or stage_entry.get("module_stage_id") or f"stage_{stage_idx}"
+                                    if stage_id in seen_stage_ids:
+                                        errors.append(
+                                            f"Duplicate stage_id '{stage_id}' across modules"
+                                        )
+                                    seen_stage_ids.add(stage_id)
+                                    # Validate max_instruction_slots <= max_instruction_sets
+                                    max_instr_slots = stage_entry.get("max_instruction_slots")
+                                    if max_instr_slots is not None:
+                                        try:
+                                            mis = int(max_instr_slots)
+                                            if mis < 1:
+                                                errors.append(
+                                                    f"prompt_learning.mipro.modules[{module_idx}].stages[{stage_idx}].max_instruction_slots must be >= 1"
+                                                )
+                                            elif mis > max_instruction_sets:
+                                                errors.append(
+                                                    f"prompt_learning.mipro.modules[{module_idx}].stages[{stage_idx}].max_instruction_slots ({mis}) "
+                                                    f"exceeds max_instruction_sets ({max_instruction_sets})"
+                                                )
+                                        except Exception:
+                                            errors.append(
+                                                f"prompt_learning.mipro.modules[{module_idx}].stages[{stage_idx}].max_instruction_slots must be an integer"
+                                            )
+                                    # Validate max_demo_slots <= max_demo_sets
+                                    max_demo_slots = stage_entry.get("max_demo_slots")
+                                    if max_demo_slots is not None:
+                                        try:
+                                            mds = int(max_demo_slots)
+                                            if mds < 0:
+                                                errors.append(
+                                                    f"prompt_learning.mipro.modules[{module_idx}].stages[{stage_idx}].max_demo_slots must be >= 0"
+                                                )
+                                            elif mds > max_demo_sets:
+                                                errors.append(
+                                                    f"prompt_learning.mipro.modules[{module_idx}].stages[{stage_idx}].max_demo_slots ({mds}) "
+                                                    f"exceeds max_demo_sets ({max_demo_sets})"
+                                                )
+                                        except Exception:
+                                            errors.append(
+                                                f"prompt_learning.mipro.modules[{module_idx}].stages[{stage_idx}].max_demo_slots must be an integer"
+                                            )
+                    # Validate edges reference valid stages
+                    edges = module_entry.get("edges")
+                    if edges is not None:
+                        if not isinstance(edges, list):
+                            errors.append(
+                                f"prompt_learning.mipro.modules[{module_idx}].edges must be a list"
+                            )
+                        else:
+                            stage_ids_in_module = set()
+                            if stages and isinstance(stages, list):
+                                for stage_entry in stages:
+                                    if isinstance(stage_entry, dict):
+                                        sid = stage_entry.get("stage_id") or stage_entry.get("module_stage_id")
+                                        if sid:
+                                            stage_ids_in_module.add(str(sid))
+                            for edge_idx, edge in enumerate(edges):
+                                if isinstance(edge, list | tuple) and len(edge) == 2:
+                                    source, target = edge
+                                elif isinstance(edge, dict):
+                                    source = edge.get("from") or edge.get("source")
+                                    target = edge.get("to") or edge.get("target")
+                                else:
+                                    errors.append(
+                                        f"prompt_learning.mipro.modules[{module_idx}].edges[{edge_idx}] must be a pair or mapping"
+                                    )
+                                    continue
+                                source_str = str(source or "").strip()
+                                target_str = str(target or "").strip()
+                                if source_str and source_str not in stage_ids_in_module:
+                                    errors.append(
+                                        f"prompt_learning.mipro.modules[{module_idx}].edges[{edge_idx}] references unknown source stage '{source_str}'"
+                                    )
+                                if target_str and target_str not in stage_ids_in_module:
+                                    errors.append(
+                                        f"prompt_learning.mipro.modules[{module_idx}].edges[{edge_idx}] references unknown target stage '{target_str}'"
+                                    )
+        # CRITICAL: Validate bootstrap_train_seeds and online_pool (can be at top level or under mipro)
+        bootstrap_seeds = pl_section.get("bootstrap_train_seeds") or (mipro_config.get("bootstrap_train_seeds") if isinstance(mipro_config, dict) else None)
+        online_pool = pl_section.get("online_pool") or (mipro_config.get("online_pool") if isinstance(mipro_config, dict) else None)
+        if not bootstrap_seeds:
+            errors.append(
+                "Missing required field: prompt_learning.bootstrap_train_seeds\n"
+                "  MIPRO requires bootstrap seeds for the few-shot bootstrapping phase.\n"
+                "  Example:\n"
+                "    [prompt_learning]\n"
+                "    bootstrap_train_seeds = [0, 1, 2, 3, 4]"
+            )
+        elif not isinstance(bootstrap_seeds, list):
+            errors.append("prompt_learning.bootstrap_train_seeds must be an array")
+        elif len(bootstrap_seeds) == 0:
+            errors.append("prompt_learning.bootstrap_train_seeds cannot be empty")
+        if not online_pool:
+            errors.append(
+                "Missing required field: prompt_learning.online_pool\n"
+                "  MIPRO requires online_pool seeds for mini-batch evaluation during optimization.\n"
+                "  Example:\n"
+                "    [prompt_learning]\n"
+                "    online_pool = [5, 6, 7, 8, 9]"
+            )
+        elif not isinstance(online_pool, list):
+            errors.append("prompt_learning.online_pool must be an array")
+        elif len(online_pool) == 0:
+            errors.append("prompt_learning.online_pool cannot be empty")
+        # Validate few_shot_score_threshold (if mipro_config exists)
+        if isinstance(mipro_config, dict):
+            threshold = mipro_config.get("few_shot_score_threshold")
+            if threshold is not None:
+                try:
+                    f = float(threshold)
+                    if not (0.0 <= f <= 1.0):
+                        errors.append("prompt_learning.mipro.few_shot_score_threshold must be between 0.0 and 1.0")
+                except Exception:
+                    errors.append("prompt_learning.mipro.few_shot_score_threshold must be a number")
+            # Validate min_bootstrap_demos (strict bootstrap mode)
+            min_bootstrap_demos = mipro_config.get("min_bootstrap_demos")
+            if min_bootstrap_demos is not None:
+                try:
+                    min_demos_int = int(min_bootstrap_demos)
+                    if min_demos_int < 0:
+                        errors.append("prompt_learning.mipro.min_bootstrap_demos must be >= 0")
+                    elif bootstrap_seeds and min_demos_int > len(bootstrap_seeds):
+                        errors.append(
+                            f"prompt_learning.mipro.min_bootstrap_demos ({min_demos_int}) exceeds "
+                            f"bootstrap_train_seeds count ({len(bootstrap_seeds)}). "
+                            f"You can never have more demos than bootstrap seeds."
+                        )
+                except (TypeError, ValueError):
+                    errors.append("prompt_learning.mipro.min_bootstrap_demos must be an integer")
+            # Validate reference pool doesn't overlap with bootstrap/online/test pools
+            reference_pool = mipro_config.get("reference_pool") or pl_section.get("reference_pool")
+            if reference_pool:
+                if not isinstance(reference_pool, list):
+                    errors.append("prompt_learning.mipro.reference_pool (or prompt_learning.reference_pool) must be an array")
+                else:
+                    all_train_test = set(bootstrap_seeds or []) | set(online_pool or []) | set(mipro_config.get("test_pool") or pl_section.get("test_pool") or [])
+                    overlapping = set(reference_pool) & all_train_test
+                    if overlapping:
+                        errors.append(
+                            f"reference_pool seeds must not overlap with bootstrap/online/test pools. "
+                            f"Found overlapping seeds: {sorted(overlapping)}"
+                        )
+    # Raise all errors at once for better UX
+    if errors:
+        _raise_validation_errors(errors, config_path)
+def _raise_validation_errors(errors: list[str], config_path: Path) -> None:
+    """Format and raise validation errors."""
+    error_msg = (
+        f"\n❌ Invalid prompt learning config: {config_path}\n\n"
+        f"Found {len(errors)} error(s):\n\n"
+    )
+    for i, error in enumerate(errors, 1):
+        # Indent multi-line errors
+        indented_error = "\n  ".join(error.split("\n"))
+        error_msg += f"{i}. {indented_error}\n\n"
+        error_msg += (
+            "📖 See example configs:\n"
+            "  - cookbooks/dev/blog_posts/gepa/configs/banking77_gepa_local.toml\n"
+            "  - cookbooks/dev/blog_posts/mipro/configs/banking77_mipro_local.toml\n"
+        )
+    raise click.ClickException(error_msg)
+def validate_rl_config(config_data: dict[str, Any], config_path: Path) -> None:
+    """
+    Validate RL config BEFORE sending to backend.
+    Args:
+        config_data: Parsed TOML/JSON config
+        config_path: Path to config file (for error messages)
+    Raises:
+        ConfigValidationError: If config is invalid
+        click.ClickException: If validation fails (for CLI)
+    """
+    errors: list[str] = []
+    # Check for rl section
+    rl_section = config_data.get("rl") or config_data.get("online_rl")
+    if not rl_section:
+        errors.append(
+            "Missing [rl] or [online_rl] section in config"
+        )
+        _raise_validation_errors(errors, config_path)
+        return
+    # Validate algorithm
+    algorithm = rl_section.get("algorithm")
+    if not algorithm:
+        errors.append(
+            "Missing required field: rl.algorithm\n"
+            "  Must be one of: 'grpo', 'ppo', etc."
+        )
+    # Validate task_url
+    task_url = rl_section.get("task_url")
+    if not task_url:
+        errors.append(
+            "Missing required field: rl.task_url"
+        )
+    elif not isinstance(task_url, str):
+        errors.append(
+            f"task_url must be a string, got {type(task_url).__name__}"
+        )
+    if errors:
+        _raise_validation_errors(errors, config_path)
+def validate_sft_config(config_data: dict[str, Any], config_path: Path) -> None:
+    """
+    Validate SFT config BEFORE sending to backend.
+    Args:
+        config_data: Parsed TOML/JSON config
+        config_path: Path to config file (for error messages)
+    Raises:
+        ConfigValidationError: If config is invalid
+        click.ClickException: If validation fails (for CLI)
+    """
+    errors: list[str] = []
+    # Check for sft section
+    sft_section = config_data.get("sft")
+    if not sft_section:
+        errors.append(
+            "Missing [sft] section in config"
+        )
+        _raise_validation_errors(errors, config_path)
+        return
+    # Validate model
+    model = sft_section.get("model")
+    if not model:
+        errors.append(
+            "Missing required field: sft.model"
+        )
+    if errors:
+        _raise_validation_errors(errors, config_path)
+def validate_gepa_config_from_file(config_path: Path) -> Tuple[bool, List[str]]:
+    """Validate GEPA config from TOML file with comprehensive checks.
+    Returns:
+        (is_valid, errors) tuple where errors is a list of error messages
+    """
+    errors = []
+    try:
+        with open(config_path) as f:
+            config_dict = toml.load(f)
+    except Exception as e:
+        return False, [f"Failed to parse TOML: {e}"]
+    pl_section = config_dict.get("prompt_learning", {})
+    if not isinstance(pl_section, dict):
+        errors.append("❌ [prompt_learning] section is missing or invalid")
+        return False, errors
+    # Check algorithm
+    algorithm = pl_section.get("algorithm")
+    if algorithm != "gepa":
+        errors.append(f"❌ Expected algorithm='gepa', got '{algorithm}'")
+    # Check required top-level fields (env_name is now in gepa section)
+    required_top_level = ["task_app_url", "task_app_api_key"]
+    for field in required_top_level:
+        if not pl_section.get(field):
+            errors.append(f"❌ [prompt_learning].{field} is required")
+    # Check GEPA section
+    gepa_section = pl_section.get("gepa", {})
+    if not isinstance(gepa_section, dict):
+        errors.append("❌ [prompt_learning.gepa] section is missing or invalid")
+        return False, errors
+    # Check env_name in gepa section (required)
+    if not gepa_section.get("env_name"):
+        errors.append("❌ [prompt_learning.gepa].env_name is required")
+    # Check required GEPA subsections
+    required_sections = ["evaluation", "rollout", "mutation", "population", "archive", "token"]
+    missing_sections = [s for s in required_sections if not gepa_section.get(s)]
+    if missing_sections:
+        errors.append(
+            f"❌ Missing required GEPA sections: {', '.join(f'[prompt_learning.gepa.{s}]' for s in missing_sections)}"
+        )
+    # Validate evaluation section
+    eval_section = gepa_section.get("evaluation", {})
+    if isinstance(eval_section, dict):
+        # Check train_seeds (required, can be in eval section or top-level)
+        train_seeds = (
+            eval_section.get("train_seeds") or
+            eval_section.get("seeds") or
+            pl_section.get("train_seeds")
+        )
+        if not train_seeds:
+            errors.append(
+                "❌ train_seeds is required. "
+                "Must be in [prompt_learning.gepa.evaluation].train_seeds or [prompt_learning].train_seeds"
+            )
+        elif not isinstance(train_seeds, list):
+            errors.append(f"❌ train_seeds must be a list, got {type(train_seeds).__name__}")
+        elif len(train_seeds) == 0:
+            errors.append("❌ train_seeds cannot be empty")
+        elif not all(isinstance(s, int) for s in train_seeds):
+            errors.append("❌ train_seeds must contain only integers")
+        # Check val_seeds (required)
+        val_seeds = eval_section.get("val_seeds") or eval_section.get("validation_seeds")
+        if not val_seeds:
+            errors.append(
+                "❌ val_seeds is required in [prompt_learning.gepa.evaluation].val_seeds"
+            )
+        elif not isinstance(val_seeds, list):
+            errors.append(f"❌ val_seeds must be a list, got {type(val_seeds).__name__}")
+        elif len(val_seeds) == 0:
+            errors.append("❌ val_seeds cannot be empty")
+        elif not all(isinstance(s, int) for s in val_seeds):
+            errors.append("❌ val_seeds must contain only integers")
+        # Check validation_pool (optional but should be valid if present)
+        validation_pool = eval_section.get("validation_pool")
+        if validation_pool is not None:
+            if not isinstance(validation_pool, str):
+                errors.append(f"❌ validation_pool must be a string, got {type(validation_pool).__name__}")
+            elif validation_pool not in ("train", "test", "val", "validation"):
+                errors.append(
+                    f"❌ validation_pool must be one of: train, test, val, validation. Got '{validation_pool}'"
+                )
+        # Check validation_top_k (optional but should be valid if present)
+        validation_top_k = eval_section.get("validation_top_k")
+        if validation_top_k is not None:
+            if not isinstance(validation_top_k, int):
+                errors.append(f"❌ validation_top_k must be an integer, got {type(validation_top_k).__name__}")
+            elif validation_top_k <= 0:
+                errors.append(f"❌ validation_top_k must be > 0, got {validation_top_k}")
+    # Validate rollout section
+    rollout_section = gepa_section.get("rollout", {})
+    if isinstance(rollout_section, dict):
+        budget = rollout_section.get("budget")
+        if budget is None:
+            errors.append("❌ [prompt_learning.gepa.rollout].budget is required")
+        elif not isinstance(budget, int):
+            errors.append(f"❌ rollout.budget must be an integer, got {type(budget).__name__}")
+        elif budget <= 0:
+            errors.append(f"❌ rollout.budget must be > 0, got {budget}")
+        max_concurrent = rollout_section.get("max_concurrent")
+        if max_concurrent is not None:
+            if not isinstance(max_concurrent, int):
+                errors.append(f"❌ rollout.max_concurrent must be an integer, got {type(max_concurrent).__name__}")
+            elif max_concurrent <= 0:
+                errors.append(f"❌ rollout.max_concurrent must be > 0, got {max_concurrent}")
+    # Validate mutation section
+    mutation_section = gepa_section.get("mutation", {})
+    if isinstance(mutation_section, dict):
+        required_mutation_fields = ["llm_model", "llm_provider"]
+        for field in required_mutation_fields:
+            if not mutation_section.get(field):
+                errors.append(f"❌ [prompt_learning.gepa.mutation].{field} is required")
+        rate = mutation_section.get("rate")
+        if rate is not None:
+            if not isinstance(rate, int | float):
+                errors.append(f"❌ mutation.rate must be a number, got {type(rate).__name__}")
+            elif not (0.0 <= rate <= 1.0):
+                errors.append(f"❌ mutation.rate must be between 0.0 and 1.0, got {rate}")
+    # Validate population section
+    population_section = gepa_section.get("population", {})
+    if isinstance(population_section, dict):
+        initial_size = population_section.get("initial_size")
+        if initial_size is not None:
+            if not isinstance(initial_size, int):
+                errors.append(f"❌ population.initial_size must be an integer, got {type(initial_size).__name__}")
+            elif initial_size <= 0:
+                errors.append(f"❌ population.initial_size must be > 0, got {initial_size}")
+        num_generations = population_section.get("num_generations")
+        if num_generations is not None:
+            if not isinstance(num_generations, int):
+                errors.append(f"❌ population.num_generations must be an integer, got {type(num_generations).__name__}")
+            elif num_generations <= 0:
+                errors.append(f"❌ population.num_generations must be > 0, got {num_generations}")
+    # Validate archive section
+    archive_section = gepa_section.get("archive", {})
+    if isinstance(archive_section, dict):
+        max_size = archive_section.get("max_size")
+        if max_size is not None:
+            if not isinstance(max_size, int):
+                errors.append(f"❌ archive.max_size must be an integer, got {type(max_size).__name__}")
+            elif max_size < 0:
+                errors.append(f"❌ archive.max_size must be >= 0, got {max_size}")
+    # Validate token section
+    token_section = gepa_section.get("token", {})
+    if isinstance(token_section, dict):
+        max_limit = token_section.get("max_limit")
+        if max_limit is not None:
+            if not isinstance(max_limit, int):
+                errors.append(f"❌ token.max_limit must be an integer, got {type(max_limit).__name__}")
+            elif max_limit <= 0:
+                errors.append(f"❌ token.max_limit must be > 0, got {max_limit}")
+    # Check initial_prompt section
+    initial_prompt = pl_section.get("initial_prompt", {})
+    if not isinstance(initial_prompt, dict):
+        errors.append("❌ [prompt_learning.initial_prompt] section is missing or invalid")
+    else:
+        if not initial_prompt.get("id"):
+            errors.append("❌ [prompt_learning.initial_prompt].id is required")
+        if not initial_prompt.get("messages"):
+            errors.append("❌ [prompt_learning.initial_prompt].messages is required (must be a list)")
+        elif not isinstance(initial_prompt.get("messages"), list):
+            errors.append("❌ [prompt_learning.initial_prompt].messages must be a list")
+        elif len(initial_prompt.get("messages", [])) == 0:
+            errors.append("❌ [prompt_learning.initial_prompt].messages cannot be empty")
+    # Check policy section
+    policy_section = pl_section.get("policy", {})
+    if not isinstance(policy_section, dict):
+        errors.append("❌ [prompt_learning.policy] section is missing or invalid")
+    else:
+        # Validate policy section - reject inference_url (backend requirement)
+        if "inference_url" in policy_section:
+            errors.append(
+                "❌ inference_url must not be specified in [prompt_learning.policy]. "
+                "The trainer provides the inference URL in rollout requests. "
+                "Remove inference_url from your config file."
+            )
+        if "api_base" in policy_section:
+            errors.append(
+                "❌ api_base must not be specified in [prompt_learning.policy]. "
+                "The trainer provides the inference URL in rollout requests. "
+                "Remove api_base from your config file."
+            )
+        if "base_url" in policy_section:
+            errors.append(
+                "❌ base_url must not be specified in [prompt_learning.policy]. "
+                "The trainer provides the inference URL in rollout requests. "
+                "Remove base_url from your config file."
+            )
+        if not policy_section.get("model"):
+            errors.append("❌ [prompt_learning.policy].model is required")
+        if not policy_section.get("provider"):
+            errors.append("❌ [prompt_learning.policy].provider is required")
+    # Validate proxy_models section (can be at top-level or gepa-specific)
+    proxy_models_section = pl_section.get("proxy_models") or gepa_section.get("proxy_models")
+    if proxy_models_section:
+        if not isinstance(proxy_models_section, dict):
+            errors.append("❌ proxy_models must be a table/dict when provided")
+        else:
+            required_fields = ["hi_provider", "hi_model", "lo_provider", "lo_model"]
+            for field in required_fields:
+                if not proxy_models_section.get(field):
+                    errors.append(f"❌ proxy_models.{field} is required")
+            # Validate numeric fields
+            for field, min_val in [("n_min_hi", 0), ("r2_thresh", 0.0), ("r2_stop", 0.0), ("sigma_max", 0.0), ("sigma_stop", 0.0), ("verify_every", 0)]:
+                val = proxy_models_section.get(field)
+                if val is not None:
+                    try:
+                        if field in ("r2_thresh", "r2_stop"):
+                            fval = float(val)
+                            if not (0.0 <= fval <= 1.0):
+                                errors.append(f"❌ proxy_models.{field} must be between 0.0 and 1.0, got {fval}")
+                        elif field.startswith("sigma"):
+                            fval = float(val)
+                            if fval < min_val:
+                                errors.append(f"❌ proxy_models.{field} must be >= {min_val}, got {fval}")
+                        else:
+                            ival = int(val)
+                            if ival < min_val:
+                                errors.append(f"❌ proxy_models.{field} must be >= {min_val}, got {ival}")
+                    except (TypeError, ValueError):
+                        errors.append(f"❌ proxy_models.{field} must be numeric, got {type(val).__name__}")
+            # Validate provider/model combinations
+            if proxy_models_section.get("hi_provider") and proxy_models_section.get("hi_model"):
+                hi_errors = _validate_model_for_provider(
+                    proxy_models_section["hi_model"],
+                    proxy_models_section["hi_provider"],
+                    "proxy_models.hi_model",
+                    allow_nano=True,
+                )
+                errors.extend(hi_errors)
+            if proxy_models_section.get("lo_provider") and proxy_models_section.get("lo_model"):
+                lo_errors = _validate_model_for_provider(
+                    proxy_models_section["lo_model"],
+                    proxy_models_section["lo_provider"],
+                    "proxy_models.lo_model",
+                    allow_nano=True,
+                )
+                errors.extend(lo_errors)
+    # Validate adaptive_pool section (GEPA-specific)
+    adaptive_pool_section = gepa_section.get("adaptive_pool")
+    if adaptive_pool_section:
+        _validate_adaptive_pool_config(adaptive_pool_section, "gepa.adaptive_pool", errors)
+    # Validate adaptive_batch section (GEPA-specific)
+    adaptive_batch_section = gepa_section.get("adaptive_batch")
+    if adaptive_batch_section:
+        if not isinstance(adaptive_batch_section, dict):
+            errors.append("❌ gepa.adaptive_batch must be a table/dict when provided")
+        else:
+            level = adaptive_batch_section.get("level")
+            if level is not None:
+                valid_levels = {"NONE", "LOW", "MODERATE", "HIGH"}
+                if str(level).upper() not in valid_levels:
+                    errors.append(
+                        f"❌ gepa.adaptive_batch.level must be one of {valid_levels}, got '{level}'"
+                    )
+            # Validate numeric fields
+            for field, min_val in [
+                ("reflection_minibatch_size", 1),
+                ("val_subsample_size", 1),
+            ]:
+                val = adaptive_batch_section.get(field)
+                if val is not None:
+                    try:
+                        ival = int(val)
+                        if ival < min_val:
+                            errors.append(f"❌ gepa.adaptive_batch.{field} must be >= {min_val}, got {ival}")
+                    except (TypeError, ValueError):
+                        errors.append(f"❌ gepa.adaptive_batch.{field} must be an integer, got {type(val).__name__}")
+            # Validate min_local_improvement
+            min_improvement = adaptive_batch_section.get("min_local_improvement")
+            if min_improvement is not None:
+                try:
+                    float(min_improvement)  # Just validate it's numeric
+                except (TypeError, ValueError):
+                    errors.append(
+                        f"❌ gepa.adaptive_batch.min_local_improvement must be numeric, got {type(min_improvement).__name__}"
+                    )
+            # Validate val_evaluation_mode
+            val_mode = adaptive_batch_section.get("val_evaluation_mode")
+            if val_mode is not None and val_mode not in ("full", "subsample"):
+                errors.append(
+                    f"❌ gepa.adaptive_batch.val_evaluation_mode must be 'full' or 'subsample', got '{val_mode}'"
+                )
+            # Validate candidate_selection_strategy
+            selection_strategy = adaptive_batch_section.get("candidate_selection_strategy")
+            if selection_strategy is not None and selection_strategy not in ("coverage", "random"):
+                errors.append(
+                    f"❌ gepa.adaptive_batch.candidate_selection_strategy must be 'coverage' or 'random', got '{selection_strategy}'"
+                )
+            # Validate val_evaluation_mode="subsample" requires val_subsample_size > 0
+            val_mode = adaptive_batch_section.get("val_evaluation_mode")
+            if val_mode == "subsample":
+                subsample_size = adaptive_batch_section.get("val_subsample_size")
+                if subsample_size is None:
+                    errors.append(
+                        "❌ gepa.adaptive_batch.val_evaluation_mode='subsample' requires val_subsample_size to be set"
+                    )
+                elif isinstance(subsample_size, int | float) and subsample_size <= 0:
+                    errors.append(
+                        f"❌ gepa.adaptive_batch.val_subsample_size must be > 0 when val_evaluation_mode='subsample', got {subsample_size}"
+                    )
+    return len(errors) == 0, errors
+def validate_mipro_config_from_file(config_path: Path) -> Tuple[bool, List[str]]:
+    """Validate MIPRO config from TOML file with comprehensive checks.
+    Returns:
+        (is_valid, errors) tuple where errors is a list of error messages
+    """
+    errors = []
+    try:
+        with open(config_path) as f:
+            config_dict = toml.load(f)
+    except Exception as e:
+        return False, [f"Failed to parse TOML: {e}"]
+    pl_section = config_dict.get("prompt_learning", {})
+    if not isinstance(pl_section, dict):
+        errors.append("❌ [prompt_learning] section is missing or invalid")
+        return False, errors
+    # Check algorithm
+    algorithm = pl_section.get("algorithm")
+    if algorithm != "mipro":
+        errors.append(f"❌ Expected algorithm='mipro', got '{algorithm}'")
+    # Check required top-level fields
+    required_top_level = ["task_app_url", "task_app_api_key"]
+    for field in required_top_level:
+        if not pl_section.get(field):
+            errors.append(f"❌ [prompt_learning].{field} is required")
+    # Check env_name (required - can be at top level or in mipro section)
+    env_name = pl_section.get("env_name") or pl_section.get("task_app_id")
+    mipro_section = pl_section.get("mipro", {})
+    if isinstance(mipro_section, dict):
+        env_name = env_name or mipro_section.get("env_name")
+    if not env_name:
+        errors.append(
+            "❌ env_name is required. "
+            "Must be in [prompt_learning].env_name, [prompt_learning].task_app_id, or [prompt_learning.mipro].env_name"
+        )
+    # Check MIPRO section
+    if not isinstance(mipro_section, dict):
+        errors.append("❌ [prompt_learning.mipro] section is missing or invalid")
+        return False, errors
+    # Validate policy section - reject inference_url
+    policy_section = pl_section.get("policy", {})
+    if isinstance(policy_section, dict):
+        if "inference_url" in policy_section:
+            errors.append(
+                "❌ inference_url must not be specified in [prompt_learning.policy]. "
+                "The trainer provides the inference URL in rollout requests. "
+                "Remove inference_url from your config file."
+            )
+        if "api_base" in policy_section:
+            errors.append(
+                "❌ api_base must not be specified in [prompt_learning.policy]. "
+                "The trainer provides the inference URL in rollout requests. "
+                "Remove api_base from your config file."
+            )
+        if "base_url" in policy_section:
+            errors.append(
+                "❌ base_url must not be specified in [prompt_learning.policy]. "
+                "The trainer provides the inference URL in rollout requests. "
+                "Remove base_url from your config file."
+            )
+    # CRITICAL: Validate bootstrap_train_seeds and online_pool (can be at top level or under mipro)
+    bootstrap_seeds = (
+        mipro_section.get("bootstrap_train_seeds") or
+        pl_section.get("bootstrap_train_seeds")
+    )
+    if not bootstrap_seeds:
+        errors.append(
+            "❌ bootstrap_train_seeds is required. "
+            "Must be in [prompt_learning].bootstrap_train_seeds or [prompt_learning.mipro].bootstrap_train_seeds"
+        )
+    elif not isinstance(bootstrap_seeds, list):
+        errors.append(f"❌ bootstrap_train_seeds must be a list, got {type(bootstrap_seeds).__name__}")
+    elif len(bootstrap_seeds) == 0:
+        errors.append("❌ bootstrap_train_seeds cannot be empty")
+    elif not all(isinstance(s, int) for s in bootstrap_seeds):
+        errors.append("❌ bootstrap_train_seeds must contain only integers")
+    online_pool = (
+        mipro_section.get("online_pool") or
+        pl_section.get("online_pool")
+    )
+    if not online_pool:
+        errors.append(
+            "❌ online_pool is required. "
+            "Must be in [prompt_learning].online_pool or [prompt_learning.mipro].online_pool"
+        )
+    elif not isinstance(online_pool, list):
+        errors.append(f"❌ online_pool must be a list, got {type(online_pool).__name__}")
+    elif len(online_pool) == 0:
+        errors.append("❌ online_pool cannot be empty")
+    elif not all(isinstance(s, int) for s in online_pool):
+        errors.append("❌ online_pool must contain only integers")
+    # CRITICAL: Validate reference_pool is required (backend requires it)
+    reference_pool = (
+        mipro_section.get("reference_pool") or
+        pl_section.get("reference_pool")
+    )
+    if not reference_pool:
+        errors.append(
+            "❌ reference_pool is required for MIPRO. "
+            "reference_pool seeds are used to build the reference corpus for meta-prompt context. "
+            "Add reference_pool at [prompt_learning] or [prompt_learning.mipro] level. "
+            "Example: reference_pool = [30, 31, 32, 33, 34]"
+        )
+    elif not isinstance(reference_pool, list):
+        errors.append(f"❌ reference_pool must be a list, got {type(reference_pool).__name__}")
+    elif len(reference_pool) == 0:
+        errors.append("❌ reference_pool cannot be empty")
+    elif not all(isinstance(s, int) for s in reference_pool):
+        errors.append("❌ reference_pool must contain only integers")
+    else:
+        # Validate reference pool doesn't overlap with bootstrap/online/test pools
+        test_pool = (
+            mipro_section.get("test_pool") or
+            pl_section.get("test_pool") or
+            []
+        )
+        all_train_test = set(bootstrap_seeds or []) | set(online_pool or []) | set(test_pool)
+        overlapping = set(reference_pool) & all_train_test
+        if overlapping:
+            errors.append(
+                f"❌ reference_pool seeds must not overlap with bootstrap/online/test pools. "
+                f"Found overlapping seeds: {sorted(overlapping)}"
+            )
+    # Validate required numeric fields
+    required_numeric_fields = [
+        "num_iterations",
+        "num_evaluations_per_iteration",
+        "batch_size",
+        "max_concurrent",
+    ]
+    for field in required_numeric_fields:
+        val = mipro_section.get(field)
+        if val is None:
+            errors.append(f"❌ [prompt_learning.mipro].{field} is required")
+        elif not isinstance(val, int):
+            errors.append(f"❌ mipro.{field} must be an integer, got {type(val).__name__}")
+        elif val <= 0:
+            errors.append(f"❌ mipro.{field} must be > 0, got {val}")
+    # Validate optional numeric fields
+    optional_numeric_fields = [
+        ("max_demo_set_size", True),
+        ("max_demo_sets", True),
+        ("max_instruction_sets", True),
+        ("full_eval_every_k", True),
+        ("instructions_per_batch", True),
+        ("max_instructions", True),
+        ("duplicate_retry_limit", True),
+    ]
+    for field, must_be_positive in optional_numeric_fields:
+        val = mipro_section.get(field)
+        if val is not None:
+            if not isinstance(val, int):
+                errors.append(f"❌ mipro.{field} must be an integer, got {type(val).__name__}")
+            elif must_be_positive and val <= 0:
+                errors.append(f"❌ mipro.{field} must be > 0, got {val}")
+            elif not must_be_positive and val < 0:
+                errors.append(f"❌ mipro.{field} must be >= 0, got {val}")
+    # Validate meta_model if set (optional - backend applies defaults)
+    meta_model = mipro_section.get("meta_model")
+    meta_model_provider = mipro_section.get("meta_model_provider", "").strip()
+    if meta_model:
+        # If meta_model is explicitly set, validate it
+        if not meta_model_provider:
+            errors.append(
+                "❌ [prompt_learning.mipro].meta_model_provider is required when meta_model is set"
+            )
+        else:
+            errors.extend(_validate_model_for_provider(
+                meta_model, meta_model_provider, "prompt_learning.mipro.meta_model", allow_nano=False
+            ))
+    # If meta_model is not set, backend will use defaults (llama-3.3-70b-versatile/groq)
+    # Validate meta model temperature
+    meta_temperature = mipro_section.get("meta_model_temperature")
+    if meta_temperature is not None:
+        if not isinstance(meta_temperature, int | float):
+            errors.append(f"❌ mipro.meta_model_temperature must be numeric, got {type(meta_temperature).__name__}")
+        else:
+            temp = float(meta_temperature)
+            if temp < 0.0:
+                errors.append(f"❌ mipro.meta_model_temperature must be >= 0.0, got {temp}")
+    # Validate meta model max_tokens
+    meta_max_tokens = mipro_section.get("meta_model_max_tokens")
+    if meta_max_tokens is not None and not isinstance(meta_max_tokens, int):
+        errors.append(f"❌ mipro.meta_model_max_tokens must be an integer, got {type(meta_max_tokens).__name__}")
+    # Validate proposer_effort (can be in instructions section or top-level mipro section)
+    instructions_section = mipro_section.get("instructions", {})
+    if not isinstance(instructions_section, dict):
+        instructions_section = {}
+    proposer_effort = str(
+        instructions_section.get("proposer_effort") or
+        mipro_section.get("proposer_effort") or
+        "LOW"
+    ).upper()
+    valid_effort_levels = {"LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"}
+    if proposer_effort not in valid_effort_levels:
+        errors.append(
+            f"❌ Invalid proposer_effort: '{proposer_effort}'\n"
+            f"  Must be one of: {', '.join(sorted(valid_effort_levels))}\n"
+            f"  Got: '{proposer_effort}'"
+        )
+    # Validate proposer_output_tokens (can be in instructions section or top-level mipro section)
+    proposer_output_tokens = str(
+        instructions_section.get("proposer_output_tokens") or
+        mipro_section.get("proposer_output_tokens") or
+        "FAST"
+    ).upper()
+    valid_output_tokens = {"RAPID", "FAST", "SLOW"}
+    if proposer_output_tokens not in valid_output_tokens:
+        errors.append(
+            f"❌ Invalid proposer_output_tokens: '{proposer_output_tokens}'\n"
+            f"  Must be one of: {', '.join(sorted(valid_output_tokens))}\n"
+            f"  Got: '{proposer_output_tokens}'"
+        )
+    # Note: RAPID can now be used with any proposer_effort level (5000 tokens)
+    # Validate meta_max_tokens if present
+    meta_max_tokens = mipro_section.get("meta_model_max_tokens")
+    if meta_max_tokens is not None:
+        try:
+            meta_max_tokens_val = int(meta_max_tokens)
+            if meta_max_tokens_val <= 0:
+                errors.append(f"❌ mipro.meta_model_max_tokens must be > 0, got {meta_max_tokens_val}")
+        except (TypeError, ValueError):
+            errors.append(f"❌ mipro.meta_model_max_tokens must be an integer, got {type(meta_max_tokens).__name__}")
+    # Validate generate_at_iterations
+    generate_at_iterations = mipro_section.get("generate_at_iterations")
+    if generate_at_iterations is not None:
+        if not isinstance(generate_at_iterations, list):
+            errors.append(f"❌ mipro.generate_at_iterations must be a list, got {type(generate_at_iterations).__name__}")
+        else:
+            for idx, iter_val in enumerate(generate_at_iterations):
+                try:
+                    iter_int = int(iter_val)
+                    if iter_int < 0:
+                        errors.append(
+                            f"❌ mipro.generate_at_iterations[{idx}] must be >= 0, got {iter_int}"
+                        )
+                except Exception:
+                    errors.append(
+                        f"❌ mipro.generate_at_iterations[{idx}] must be an integer, got {iter_val!r}"
+                    )
+    # Validate spec configuration
+    spec_path = mipro_section.get("spec_path")
+    if spec_path:
+        # Validate spec_max_tokens if provided
+        spec_max_tokens = mipro_section.get("spec_max_tokens")
+        if spec_max_tokens is not None:
+            if not isinstance(spec_max_tokens, int):
+                errors.append(f"❌ mipro.spec_max_tokens must be an integer, got {type(spec_max_tokens).__name__}")
+            elif spec_max_tokens <= 0:
+                errors.append(f"❌ mipro.spec_max_tokens must be > 0, got {spec_max_tokens}")
+        # Validate spec_priority_threshold if provided
+        spec_priority_threshold = mipro_section.get("spec_priority_threshold")
+        if spec_priority_threshold is not None:
+            if not isinstance(spec_priority_threshold, int):
+                errors.append(f"❌ mipro.spec_priority_threshold must be an integer, got {type(spec_priority_threshold).__name__}")
+            elif spec_priority_threshold < 0:
+                errors.append(f"❌ mipro.spec_priority_threshold must be >= 0, got {spec_priority_threshold}")
+    # Validate few_shot_score_threshold
+    few_shot_score_threshold = mipro_section.get("few_shot_score_threshold")
+    if few_shot_score_threshold is not None:
+        if not isinstance(few_shot_score_threshold, int | float):
+            errors.append(f"❌ mipro.few_shot_score_threshold must be numeric, got {type(few_shot_score_threshold).__name__}")
+        else:
+            threshold = float(few_shot_score_threshold)
+            if not (0.0 <= threshold <= 1.0):
+                errors.append(f"❌ mipro.few_shot_score_threshold must be between 0.0 and 1.0, got {threshold}")
+    # Validate modules/stages configuration
+    modules_config = mipro_section.get("modules")
+    if modules_config is not None:
+        if not isinstance(modules_config, list):
+            errors.append(f"❌ mipro.modules must be a list, got {type(modules_config).__name__}")
+        else:
+            max_instruction_sets = mipro_section.get("max_instruction_sets", 128)
+            max_demo_sets = mipro_section.get("max_demo_sets", 128)
+            seen_module_ids = set()
+            seen_stage_ids = set()
+            for module_idx, module_entry in enumerate(modules_config):
+                if not isinstance(module_entry, dict):
+                    errors.append(
+                        f"❌ mipro.modules[{module_idx}] must be a table/dict, got {type(module_entry).__name__}"
+                    )
+                    continue
+                module_id = module_entry.get("module_id") or module_entry.get("id") or f"module_{module_idx}"
+                if module_id in seen_module_ids:
+                    errors.append(
+                        f"❌ Duplicate module_id '{module_id}' in mipro.modules"
+                    )
+                seen_module_ids.add(module_id)
+                # Validate stages
+                stages = module_entry.get("stages")
+                if stages is not None:
+                    if not isinstance(stages, list):
+                        errors.append(
+                            f"❌ mipro.modules[{module_idx}].stages must be a list, got {type(stages).__name__}"
+                        )
+                    else:
+                        for stage_idx, stage_entry in enumerate(stages):
+                            if isinstance(stage_entry, dict):
+                                stage_id = stage_entry.get("stage_id") or stage_entry.get("module_stage_id") or f"stage_{stage_idx}"
+                                if stage_id in seen_stage_ids:
+                                    errors.append(
+                                        f"❌ Duplicate stage_id '{stage_id}' across modules"
+                                    )
+                                seen_stage_ids.add(stage_id)
+                                # Validate max_instruction_slots <= max_instruction_sets
+                                max_instr_slots = stage_entry.get("max_instruction_slots")
+                                if max_instr_slots is not None:
+                                    try:
+                                        mis = int(max_instr_slots)
+                                        if mis < 1:
+                                            errors.append(
+                                                f"❌ mipro.modules[{module_idx}].stages[{stage_idx}].max_instruction_slots must be >= 1, got {mis}"
+                                            )
+                                        elif mis > max_instruction_sets:
+                                            errors.append(
+                                                f"❌ mipro.modules[{module_idx}].stages[{stage_idx}].max_instruction_slots ({mis}) "
+                                                f"exceeds max_instruction_sets ({max_instruction_sets})"
+                                            )
+                                    except Exception:
+                                        errors.append(
+                                            f"❌ mipro.modules[{module_idx}].stages[{stage_idx}].max_instruction_slots must be an integer"
+                                        )
+                                # Validate max_demo_slots <= max_demo_sets
+                                max_demo_slots = stage_entry.get("max_demo_slots")
+                                if max_demo_slots is not None:
+                                    try:
+                                        mds = int(max_demo_slots)
+                                        if mds < 0:
+                                            errors.append(
+                                                f"❌ mipro.modules[{module_idx}].stages[{stage_idx}].max_demo_slots must be >= 0, got {mds}"
+                                            )
+                                        elif mds > max_demo_sets:
+                                            errors.append(
+                                                f"❌ mipro.modules[{module_idx}].stages[{stage_idx}].max_demo_slots ({mds}) "
+                                                f"exceeds max_demo_sets ({max_demo_sets})"
+                                            )
+                                    except Exception:
+                                        errors.append(
+                                            f"❌ mipro.modules[{module_idx}].stages[{stage_idx}].max_demo_slots must be an integer"
+                                        )
+                                # Validate per-stage policy config (REQUIRED)
+                                stage_policy = stage_entry.get("policy")
+                                if stage_policy is None:
+                                    errors.append(
+                                        f"❌ mipro.modules[{module_idx}].stages[{stage_idx}]: [policy] table is REQUIRED. "
+                                        f"Each stage must have its own policy configuration with 'model' and 'provider' fields."
+                                    )
+                                elif not isinstance(stage_policy, dict):
+                                    errors.append(
+                                        f"❌ mipro.modules[{module_idx}].stages[{stage_idx}]: [policy] must be a table/dict, got {type(stage_policy).__name__}"
+                                    )
+                                else:
+                                    # Validate required fields in stage policy
+                                    if not stage_policy.get("model"):
+                                        errors.append(
+                                            f"❌ mipro.modules[{module_idx}].stages[{stage_idx}]: [policy].model is required"
+                                        )
+                                    if not stage_policy.get("provider"):
+                                        errors.append(
+                                            f"❌ mipro.modules[{module_idx}].stages[{stage_idx}]: [policy].provider is required"
+                                        )
+                                    # Validate model/provider combination
+                                    stage_model = stage_policy.get("model")
+                                    stage_provider = stage_policy.get("provider")
+                                    if stage_model and stage_provider:
+                                        errors.extend(_validate_model_for_provider(
+                                            stage_model, stage_provider,
+                                            f"prompt_learning.mipro.modules[{module_idx}].stages[{stage_idx}].policy.model",
+                                            allow_nano=True,  # Policy models can be nano
+                                        ))
+                                    # Reject inference_url in stage policy (trainer provides it)
+                                    if "inference_url" in stage_policy:
+                                        errors.append(
+                                            f"❌ mipro.modules[{module_idx}].stages[{stage_idx}]: [policy].inference_url must not be specified. "
+                                            f"The trainer provides the inference URL in rollout requests. Remove inference_url from stage policy."
+                                        )
+                                    if "api_base" in stage_policy:
+                                        errors.append(
+                                            f"❌ mipro.modules[{module_idx}].stages[{stage_idx}]: [policy].api_base must not be specified. "
+                                            f"Remove api_base from stage policy."
+                                        )
+                                    if "base_url" in stage_policy:
+                                        errors.append(
+                                            f"❌ mipro.modules[{module_idx}].stages[{stage_idx}]: [policy].base_url must not be specified. "
+                                            f"Remove base_url from stage policy."
+                                        )
+                # Validate edges reference valid stages
+                edges = module_entry.get("edges")
+                if edges is not None:
+                    if not isinstance(edges, list):
+                        errors.append(
+                            f"❌ mipro.modules[{module_idx}].edges must be a list, got {type(edges).__name__}"
+                        )
+                    else:
+                        stage_ids_in_module = set()
+                        if stages and isinstance(stages, list):
+                            for stage_entry in stages:
+                                if isinstance(stage_entry, dict):
+                                    sid = stage_entry.get("stage_id") or stage_entry.get("module_stage_id")
+                                    if sid:
+                                        stage_ids_in_module.add(str(sid))
+                        for edge_idx, edge in enumerate(edges):
+                            if isinstance(edge, list | tuple) and len(edge) == 2:
+                                source, target = edge
+                            elif isinstance(edge, dict):
+                                source = edge.get("from") or edge.get("source")
+                                target = edge.get("to") or edge.get("target")
+                            else:
+                                errors.append(
+                                    f"❌ mipro.modules[{module_idx}].edges[{edge_idx}] must be a pair or mapping"
+                                )
+                                continue
+                            source_str = str(source or "").strip()
+                            target_str = str(target or "").strip()
+                            if source_str and source_str not in stage_ids_in_module:
+                                errors.append(
+                                    f"❌ mipro.modules[{module_idx}].edges[{edge_idx}] references unknown source stage '{source_str}'"
+                                )
+                            if target_str and target_str not in stage_ids_in_module:
+                                errors.append(
+                                    f"❌ mipro.modules[{module_idx}].edges[{edge_idx}] references unknown target stage '{target_str}'"
+                                )
+    # Check initial_prompt section
+    initial_prompt = pl_section.get("initial_prompt", {})
+    if not isinstance(initial_prompt, dict):
+        errors.append("❌ [prompt_learning.initial_prompt] section is missing or invalid")
+    else:
+        if not initial_prompt.get("id"):
+            errors.append("❌ [prompt_learning.initial_prompt].id is required")
+        if not initial_prompt.get("messages"):
+            errors.append("❌ [prompt_learning.initial_prompt].messages is required (must be a list)")
+        elif not isinstance(initial_prompt.get("messages"), list):
+            errors.append("❌ [prompt_learning.initial_prompt].messages must be a list")
+        elif len(initial_prompt.get("messages", [])) == 0:
+            errors.append("❌ [prompt_learning.initial_prompt].messages cannot be empty")
+    # Check policy section
+    if not isinstance(policy_section, dict):
+        errors.append("❌ [prompt_learning.policy] section is missing or invalid")
+    else:
+        if not policy_section.get("model"):
+            errors.append("❌ [prompt_learning.policy].model is required")
+        if not policy_section.get("provider"):
+            errors.append("❌ [prompt_learning.policy].provider is required")
+    # Validate proxy_models section (can be at top-level or mipro-specific)
+    proxy_models_section = pl_section.get("proxy_models") or mipro_section.get("proxy_models")
+    if proxy_models_section:
+        if not isinstance(proxy_models_section, dict):
+            errors.append("❌ proxy_models must be a table/dict when provided")
+        else:
+            required_fields = ["hi_provider", "hi_model", "lo_provider", "lo_model"]
+            for field in required_fields:
+                if not proxy_models_section.get(field):
+                    errors.append(f"❌ proxy_models.{field} is required")
+            # Validate numeric fields (same as GEPA)
+            for field, min_val in [("n_min_hi", 0), ("r2_thresh", 0.0), ("r2_stop", 0.0), ("sigma_max", 0.0), ("sigma_stop", 0.0), ("verify_every", 0)]:
+                val = proxy_models_section.get(field)
+                if val is not None:
+                    try:
+                        if field in ("r2_thresh", "r2_stop"):
+                            fval = float(val)
+                            if not (0.0 <= fval <= 1.0):
+                                errors.append(f"❌ proxy_models.{field} must be between 0.0 and 1.0, got {fval}")
+                        elif field.startswith("sigma"):
+                            fval = float(val)
+                            if fval < min_val:
+                                errors.append(f"❌ proxy_models.{field} must be >= {min_val}, got {fval}")
+                        else:
+                            ival = int(val)
+                            if ival < min_val:
+                                errors.append(f"❌ proxy_models.{field} must be >= {min_val}, got {ival}")
+                    except (TypeError, ValueError):
+                        errors.append(f"❌ proxy_models.{field} must be numeric, got {type(val).__name__}")
+            # Validate provider/model combinations
+            if proxy_models_section.get("hi_provider") and proxy_models_section.get("hi_model"):
+                hi_errors = _validate_model_for_provider(
+                    proxy_models_section["hi_model"],
+                    proxy_models_section["hi_provider"],
+                    "proxy_models.hi_model",
+                    allow_nano=True,
+                )
+                errors.extend(hi_errors)
+            if proxy_models_section.get("lo_provider") and proxy_models_section.get("lo_model"):
+                lo_errors = _validate_model_for_provider(
+                    proxy_models_section["lo_model"],
+                    proxy_models_section["lo_provider"],
+                    "proxy_models.lo_model",
+                    allow_nano=True,
+                )
+                errors.extend(lo_errors)
+    # Validate adaptive_pool section (MIPRO-specific, can be nested or flat)
+    adaptive_pool_section = mipro_section.get("adaptive_pool")
+    if adaptive_pool_section:
+        _validate_adaptive_pool_config(adaptive_pool_section, "mipro.adaptive_pool", errors)
+    return len(errors) == 0, errors
+def validate_prompt_learning_config_from_file(config_path: Path, algorithm: str) -> None:
+    """Validate prompt learning config from TOML file and raise ConfigValidationError if invalid.
+    Args:
+        config_path: Path to TOML config file
+        algorithm: Either 'gepa' or 'mipro'
+    Raises:
+        ConfigValidationError: If validation fails, with detailed error messages
+    """
+    ctx: dict[str, Any] = {"config_path": str(config_path), "algorithm": algorithm}
+    log_info("validate_prompt_learning_config_from_file invoked", ctx=ctx)
+    if algorithm == "gepa":
+        is_valid, errors = validate_gepa_config_from_file(config_path)
+    elif algorithm == "mipro":
+        is_valid, errors = validate_mipro_config_from_file(config_path)
+    else:
+        raise ValueError(f"Unknown algorithm: {algorithm}. Must be 'gepa' or 'mipro'")
+    if not is_valid:
+        error_msg = "\n".join(errors)
+        raise ConfigValidationError(
+            f"\n{'=' * 80}\n"
+            f"❌ Config Validation Failed ({algorithm.upper()})\n"
+            f"{'=' * 80}\n"
+            f"{error_msg}\n"
+            f"{'=' * 80}\n"
+        )
+__all__ = [
+    "ConfigValidationError",
+    "validate_prompt_learning_config",
+    "validate_prompt_learning_config_from_file",
+    "validate_gepa_config_from_file",
+    "validate_mipro_config_from_file",
+    "validate_rl_config",
+    "validate_sft_config",
+]

synth-ai 0.2.9.dev11__py3-none-any.whl → 0.4.1__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.9.dev11py3-none-any.whl → 0.4.1py3-none-any.whl