synth-ai 0.2.8.dev2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- synth_ai/__init__.py +44 -24
- synth_ai/__main__.py +30 -3
- synth_ai/cli/__init__.py +103 -48
- synth_ai/cli/__main__.py +42 -0
- synth_ai/cli/_internal/__init__.py +5 -0
- synth_ai/cli/_internal/modal_wrapper.py +31 -0
- synth_ai/cli/_internal/storage.py +20 -0
- synth_ai/cli/_internal/typer_patch.py +47 -0
- synth_ai/cli/_internal/validate_task_app.py +29 -0
- synth_ai/cli/agents/__init__.py +17 -0
- synth_ai/cli/agents/claude.py +77 -0
- synth_ai/cli/agents/codex.py +265 -0
- synth_ai/cli/agents/opencode.py +253 -0
- synth_ai/cli/commands/__init__.py +18 -0
- synth_ai/cli/commands/artifacts/__init__.py +13 -0
- synth_ai/cli/commands/artifacts/client.py +119 -0
- synth_ai/cli/commands/artifacts/config.py +57 -0
- synth_ai/cli/commands/artifacts/core.py +24 -0
- synth_ai/cli/commands/artifacts/download.py +188 -0
- synth_ai/cli/commands/artifacts/export.py +186 -0
- synth_ai/cli/commands/artifacts/list.py +156 -0
- synth_ai/cli/commands/artifacts/parsing.py +250 -0
- synth_ai/cli/commands/artifacts/show.py +336 -0
- synth_ai/cli/commands/demo/__init__.py +3 -0
- synth_ai/cli/commands/demo/core.py +153 -0
- synth_ai/cli/commands/eval/__init__.py +10 -0
- synth_ai/cli/commands/eval/config.py +338 -0
- synth_ai/cli/commands/eval/core.py +256 -0
- synth_ai/cli/commands/eval/runner.py +704 -0
- synth_ai/cli/commands/eval/validation.py +60 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +424 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +185 -0
- synth_ai/cli/commands/help/core.py +72 -0
- synth_ai/cli/commands/scan/__init__.py +19 -0
- synth_ai/cli/commands/scan/cloudflare_scanner.py +403 -0
- synth_ai/cli/commands/scan/core.py +344 -0
- synth_ai/cli/commands/scan/health_checker.py +242 -0
- synth_ai/cli/commands/scan/local_scanner.py +278 -0
- synth_ai/cli/commands/scan/models.py +83 -0
- synth_ai/cli/commands/smoke/__init__.py +7 -0
- synth_ai/cli/commands/smoke/core.py +1428 -0
- synth_ai/cli/commands/status/__init__.py +3 -0
- synth_ai/cli/commands/status/client.py +91 -0
- synth_ai/cli/commands/status/config.py +12 -0
- synth_ai/cli/commands/status/errors.py +11 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +3 -0
- synth_ai/cli/commands/status/subcommands/config.py +13 -0
- synth_ai/cli/commands/status/subcommands/files.py +34 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +51 -0
- synth_ai/cli/commands/status/subcommands/models.py +35 -0
- synth_ai/cli/commands/status/subcommands/runs.py +34 -0
- synth_ai/cli/commands/status/subcommands/session.py +77 -0
- synth_ai/cli/commands/status/subcommands/summary.py +39 -0
- synth_ai/cli/commands/status/subcommands/utils.py +41 -0
- synth_ai/cli/commands/status/utils.py +23 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +22 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +201 -0
- synth_ai/cli/commands/train/judge_validation.py +305 -0
- synth_ai/cli/commands/train/prompt_learning_validation.py +633 -0
- synth_ai/cli/commands/train/validation.py +392 -0
- synth_ai/cli/demo_apps/__init__.py +10 -0
- synth_ai/cli/demo_apps/core/__init__.py +28 -0
- synth_ai/{demos → cli/demo_apps}/core/cli.py +783 -441
- synth_ai/cli/demo_apps/crafter/__init__.py +1 -0
- synth_ai/cli/demo_apps/crafter/crafter_fft_4b.toml +55 -0
- synth_ai/cli/demo_apps/crafter/grpo_crafter_task_app.py +186 -0
- synth_ai/cli/demo_apps/crafter/rl_from_base_qwen4b.toml +74 -0
- synth_ai/cli/demo_apps/demo_registry.py +176 -0
- synth_ai/cli/demo_apps/demo_task_apps/__init__.py +7 -0
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/core.py +75 -37
- synth_ai/cli/demo_apps/demo_task_apps/crafter/__init__.py +1 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
- synth_ai/cli/demo_apps/demo_task_apps/crafter/grpo_crafter_task_app.py +185 -0
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/_common.py +1 -2
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/app.py +2 -1
- synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +73 -0
- synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/deploy_modal.py +3 -6
- synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +738 -0
- synth_ai/cli/demo_apps/demo_task_apps/math/task_app_entry.py +39 -0
- synth_ai/cli/demo_apps/math/__init__.py +1 -0
- synth_ai/cli/demo_apps/math/_common.py +16 -0
- synth_ai/cli/demo_apps/math/app.py +38 -0
- synth_ai/cli/demo_apps/math/config.toml +75 -0
- synth_ai/cli/demo_apps/math/deploy_modal.py +54 -0
- synth_ai/cli/demo_apps/math/modal_task_app.py +698 -0
- synth_ai/cli/demo_apps/math/task_app_entry.py +53 -0
- synth_ai/cli/demo_apps/mipro/main.py +271 -0
- synth_ai/cli/demo_apps/mipro/task_app.py +922 -0
- synth_ai/cli/demo_apps/mipro/train_cfg.toml +92 -0
- synth_ai/cli/demos/__init__.py +12 -0
- synth_ai/cli/demos/demo.py +32 -0
- synth_ai/cli/demos/rl_demo.py +254 -0
- synth_ai/cli/deploy.py +216 -0
- synth_ai/cli/infra/__init__.py +14 -0
- synth_ai/cli/{balance.py → infra/balance.py} +16 -4
- synth_ai/cli/infra/mcp.py +35 -0
- synth_ai/cli/infra/modal_app.py +36 -0
- synth_ai/cli/infra/setup.py +69 -0
- synth_ai/cli/infra/status.py +16 -0
- synth_ai/cli/infra/turso.py +77 -0
- synth_ai/cli/lib/__init__.py +10 -0
- synth_ai/cli/lib/agents.py +76 -0
- synth_ai/cli/lib/apps/modal_app.py +101 -0
- synth_ai/cli/lib/apps/task_app.py +642 -0
- synth_ai/cli/lib/bin.py +39 -0
- synth_ai/cli/lib/env.py +375 -0
- synth_ai/cli/lib/errors.py +85 -0
- synth_ai/cli/lib/modal.py +315 -0
- synth_ai/cli/lib/plotting.py +126 -0
- synth_ai/cli/lib/prompt_args.py +39 -0
- synth_ai/cli/lib/prompts.py +284 -0
- synth_ai/cli/lib/sqld.py +122 -0
- synth_ai/cli/lib/task_app_discovery.py +884 -0
- synth_ai/cli/lib/task_app_env.py +295 -0
- synth_ai/cli/lib/train_cfgs.py +300 -0
- synth_ai/cli/lib/tunnel_records.py +207 -0
- synth_ai/cli/local/__init__.py +14 -0
- synth_ai/cli/local/experiment_queue/__init__.py +72 -0
- synth_ai/cli/local/experiment_queue/api_schemas.py +221 -0
- synth_ai/cli/local/experiment_queue/celery_app.py +208 -0
- synth_ai/cli/local/experiment_queue/config.py +128 -0
- synth_ai/cli/local/experiment_queue/config_utils.py +272 -0
- synth_ai/cli/local/experiment_queue/database.py +175 -0
- synth_ai/cli/local/experiment_queue/dispatcher.py +119 -0
- synth_ai/cli/local/experiment_queue/models.py +231 -0
- synth_ai/cli/local/experiment_queue/progress_info.py +160 -0
- synth_ai/cli/local/experiment_queue/results.py +373 -0
- synth_ai/cli/local/experiment_queue/schemas.py +131 -0
- synth_ai/cli/local/experiment_queue/service.py +344 -0
- synth_ai/cli/local/experiment_queue/status.py +372 -0
- synth_ai/cli/local/experiment_queue/status_tracker.py +360 -0
- synth_ai/cli/local/experiment_queue/tasks.py +1984 -0
- synth_ai/cli/local/experiment_queue/trace_storage.py +65 -0
- synth_ai/cli/local/experiment_queue/validation.py +157 -0
- synth_ai/cli/local/session/__init__.py +92 -0
- synth_ai/cli/local/session/client.py +383 -0
- synth_ai/cli/local/session/constants.py +63 -0
- synth_ai/cli/local/session/exceptions.py +105 -0
- synth_ai/cli/local/session/manager.py +139 -0
- synth_ai/cli/local/session/models.py +89 -0
- synth_ai/cli/local/session/query.py +110 -0
- synth_ai/cli/root.py +150 -108
- synth_ai/cli/task_apps/__init__.py +37 -0
- synth_ai/cli/task_apps/commands.py +3145 -0
- synth_ai/cli/task_apps/deploy.py +7 -0
- synth_ai/cli/task_apps/list.py +26 -0
- synth_ai/cli/task_apps/main.py +36 -0
- synth_ai/cli/task_apps/modal_serve.py +11 -0
- synth_ai/cli/task_apps/serve.py +11 -0
- synth_ai/cli/training/__init__.py +8 -0
- synth_ai/cli/training/train.py +5 -0
- synth_ai/cli/training/train_cfg.py +34 -0
- synth_ai/cli/{watch.py → training/watch.py} +13 -18
- synth_ai/cli/turso.py +52 -0
- synth_ai/cli/utils/__init__.py +8 -0
- synth_ai/cli/utils/experiments.py +235 -0
- synth_ai/cli/utils/queue.py +504 -0
- synth_ai/cli/{recent.py → utils/recent.py} +13 -7
- synth_ai/cli/{traces.py → utils/traces.py} +9 -5
- synth_ai/contracts/__init__.py +67 -0
- synth_ai/core/__init__.py +100 -0
- synth_ai/core/_utils/__init__.py +54 -0
- synth_ai/core/_utils/base_url.py +10 -0
- synth_ai/core/_utils/http.py +10 -0
- synth_ai/core/_utils/prompts.py +14 -0
- synth_ai/core/_utils/task_app_state.py +12 -0
- synth_ai/core/_utils/user_config.py +10 -0
- synth_ai/core/apps/common.py +116 -0
- synth_ai/core/auth.py +95 -0
- synth_ai/core/cfgs.py +240 -0
- synth_ai/core/config/__init__.py +16 -0
- synth_ai/core/config/base.py +168 -0
- synth_ai/core/config/resolver.py +89 -0
- synth_ai/core/env.py +231 -0
- synth_ai/core/errors.py +126 -0
- synth_ai/core/http.py +230 -0
- synth_ai/core/integrations/__init__.py +11 -0
- synth_ai/core/integrations/cloudflare.py +1710 -0
- synth_ai/core/integrations/mcp/__init__.py +6 -0
- synth_ai/core/integrations/mcp/__main__.py +8 -0
- synth_ai/core/integrations/mcp/claude.py +36 -0
- synth_ai/core/integrations/mcp/main.py +254 -0
- synth_ai/core/integrations/mcp/setup.py +100 -0
- synth_ai/core/integrations/modal.py +277 -0
- synth_ai/core/json.py +72 -0
- synth_ai/core/log_filter.py +99 -0
- synth_ai/core/logging.py +82 -0
- synth_ai/core/paths.py +107 -0
- synth_ai/core/pricing.py +109 -0
- synth_ai/core/process.py +233 -0
- synth_ai/core/ssl.py +25 -0
- synth_ai/core/storage/__init__.py +71 -0
- synth_ai/core/task_app_state.py +318 -0
- synth_ai/core/telemetry.py +282 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/__init__.py +5 -1
- synth_ai/{tracing_v3 → core/tracing_v3}/abstractions.py +21 -4
- synth_ai/core/tracing_v3/config.py +229 -0
- synth_ai/core/tracing_v3/constants.py +21 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/db_config.py +42 -29
- synth_ai/{tracing_v3 → core/tracing_v3}/decorators.py +80 -45
- synth_ai/{tracing_v3 → core/tracing_v3}/examples/basic_usage.py +15 -9
- synth_ai/{tracing_v3 → core/tracing_v3}/hooks.py +6 -4
- synth_ai/{tracing_v3 → core/tracing_v3}/llm_call_record_helpers.py +161 -61
- synth_ai/{tracing_v3 → core/tracing_v3}/migration_helper.py +1 -2
- synth_ai/{tracing_v3 → core/tracing_v3}/replica_sync.py +12 -7
- synth_ai/core/tracing_v3/serialization.py +130 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/session_tracer.py +88 -21
- synth_ai/{tracing_v3 → core/tracing_v3}/storage/base.py +99 -12
- synth_ai/core/tracing_v3/storage/config.py +109 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/storage/factory.py +11 -9
- synth_ai/{tracing_v3 → core/tracing_v3}/storage/utils.py +15 -11
- synth_ai/core/tracing_v3/trace_utils.py +326 -0
- synth_ai/core/tracing_v3/turso/__init__.py +12 -0
- synth_ai/core/tracing_v3/turso/daemon.py +278 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/turso/models.py +7 -3
- synth_ai/core/tracing_v3/turso/native_manager.py +1385 -0
- synth_ai/{tracing_v3 → core/tracing_v3}/utils.py +5 -4
- synth_ai/core/urls.py +18 -0
- synth_ai/core/user_config.py +137 -0
- synth_ai/core/uvicorn.py +222 -0
- synth_ai/data/__init__.py +83 -0
- synth_ai/data/enums.py +123 -0
- synth_ai/data/rewards.py +152 -0
- synth_ai/data/traces.py +35 -0
- synth_ai/products/__init__.py +6 -0
- synth_ai/products/graph_evolve/__init__.py +46 -0
- synth_ai/products/graph_evolve/client.py +226 -0
- synth_ai/products/graph_evolve/config.py +591 -0
- synth_ai/products/graph_evolve/converters/__init__.py +42 -0
- synth_ai/products/graph_evolve/converters/openai_sft.py +484 -0
- synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +109 -0
- synth_ai/products/graph_evolve/run.py +222 -0
- synth_ai/products/graph_gepa/__init__.py +23 -0
- synth_ai/products/graph_gepa/converters/__init__.py +19 -0
- synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
- synth_ai/sdk/__init__.py +123 -0
- synth_ai/sdk/api/__init__.py +1 -0
- synth_ai/sdk/api/models/supported.py +514 -0
- synth_ai/sdk/api/research_agent/__init__.py +296 -0
- synth_ai/sdk/api/train/__init__.py +85 -0
- synth_ai/sdk/api/train/builders.py +895 -0
- synth_ai/sdk/api/train/cli.py +2199 -0
- synth_ai/sdk/api/train/config_finder.py +267 -0
- synth_ai/sdk/api/train/configs/__init__.py +65 -0
- synth_ai/sdk/api/train/configs/prompt_learning.py +1706 -0
- synth_ai/sdk/api/train/configs/rl.py +187 -0
- synth_ai/sdk/api/train/configs/sft.py +99 -0
- synth_ai/sdk/api/train/configs/shared.py +81 -0
- synth_ai/sdk/api/train/context_learning.py +312 -0
- synth_ai/sdk/api/train/env_resolver.py +418 -0
- synth_ai/sdk/api/train/graph_validators.py +216 -0
- synth_ai/sdk/api/train/graphgen.py +984 -0
- synth_ai/sdk/api/train/graphgen_models.py +823 -0
- synth_ai/sdk/api/train/graphgen_validators.py +109 -0
- synth_ai/sdk/api/train/local_api.py +10 -0
- synth_ai/sdk/api/train/pollers.py +124 -0
- synth_ai/sdk/api/train/progress/__init__.py +97 -0
- synth_ai/sdk/api/train/progress/dataclasses.py +569 -0
- synth_ai/sdk/api/train/progress/events.py +326 -0
- synth_ai/sdk/api/train/progress/results.py +428 -0
- synth_ai/sdk/api/train/progress/tracker.py +641 -0
- synth_ai/sdk/api/train/prompt_learning.py +469 -0
- synth_ai/sdk/api/train/rl.py +441 -0
- synth_ai/sdk/api/train/sft.py +396 -0
- synth_ai/sdk/api/train/summary.py +522 -0
- synth_ai/sdk/api/train/supported_algos.py +147 -0
- synth_ai/sdk/api/train/task_app.py +351 -0
- synth_ai/sdk/api/train/utils.py +279 -0
- synth_ai/sdk/api/train/validators.py +2424 -0
- synth_ai/sdk/graphs/__init__.py +15 -0
- synth_ai/sdk/graphs/completions.py +570 -0
- synth_ai/{inference → sdk/inference}/__init__.py +0 -1
- synth_ai/sdk/inference/client.py +128 -0
- synth_ai/sdk/jobs/__init__.py +16 -0
- synth_ai/sdk/jobs/client.py +371 -0
- synth_ai/sdk/judging/__init__.py +14 -0
- synth_ai/sdk/judging/base.py +24 -0
- synth_ai/sdk/judging/client.py +40 -0
- synth_ai/sdk/judging/schemas.py +222 -0
- synth_ai/sdk/judging/types.py +42 -0
- synth_ai/sdk/learning/__init__.py +99 -0
- synth_ai/sdk/learning/algorithms.py +14 -0
- synth_ai/{learning → sdk/learning}/client.py +121 -30
- synth_ai/sdk/learning/config.py +5 -0
- synth_ai/{learning → sdk/learning}/constants.py +0 -2
- synth_ai/sdk/learning/context_learning_client.py +531 -0
- synth_ai/sdk/learning/context_learning_types.py +292 -0
- synth_ai/sdk/learning/ft_client.py +7 -0
- synth_ai/{learning → sdk/learning}/health.py +15 -9
- synth_ai/{learning → sdk/learning}/jobs.py +44 -47
- synth_ai/sdk/learning/prompt_extraction.py +334 -0
- synth_ai/sdk/learning/prompt_learning_client.py +455 -0
- synth_ai/sdk/learning/prompt_learning_types.py +186 -0
- synth_ai/{rl → sdk/learning/rl}/__init__.py +13 -8
- synth_ai/{learning/rl_client.py → sdk/learning/rl/client.py} +89 -77
- synth_ai/sdk/learning/rl/config.py +31 -0
- synth_ai/{rl → sdk/learning/rl}/contracts.py +5 -14
- synth_ai/{rl → sdk/learning/rl}/env_keys.py +45 -16
- synth_ai/sdk/learning/rl/secrets.py +13 -0
- synth_ai/sdk/learning/rl_client.py +5 -0
- synth_ai/sdk/learning/sft/__init__.py +29 -0
- synth_ai/sdk/learning/sft/client.py +95 -0
- synth_ai/sdk/learning/sft/config.py +270 -0
- synth_ai/sdk/learning/sft/data.py +698 -0
- synth_ai/sdk/learning/sse.py +57 -0
- synth_ai/sdk/learning/validators.py +52 -0
- synth_ai/sdk/localapi/__init__.py +40 -0
- synth_ai/sdk/localapi/apps/__init__.py +28 -0
- synth_ai/sdk/localapi/client.py +10 -0
- synth_ai/sdk/localapi/contracts.py +10 -0
- synth_ai/sdk/localapi/helpers.py +519 -0
- synth_ai/sdk/localapi/rollouts.py +87 -0
- synth_ai/sdk/localapi/server.py +29 -0
- synth_ai/sdk/localapi/template.py +70 -0
- synth_ai/sdk/streaming/__init__.py +35 -0
- synth_ai/sdk/streaming/config.py +94 -0
- synth_ai/sdk/streaming/handlers.py +1997 -0
- synth_ai/sdk/streaming/streamer.py +713 -0
- synth_ai/sdk/streaming/types.py +112 -0
- synth_ai/sdk/task/__init__.py +164 -0
- synth_ai/sdk/task/apps/__init__.py +169 -0
- synth_ai/sdk/task/auth.py +165 -0
- synth_ai/sdk/task/client.py +175 -0
- synth_ai/sdk/task/config.py +257 -0
- synth_ai/sdk/task/contracts.py +219 -0
- synth_ai/sdk/task/datasets.py +108 -0
- synth_ai/sdk/task/errors.py +50 -0
- synth_ai/sdk/task/health.py +34 -0
- synth_ai/sdk/task/in_process.py +1190 -0
- synth_ai/sdk/task/in_process_runner.py +314 -0
- synth_ai/sdk/task/inference_api.py +299 -0
- synth_ai/sdk/task/json.py +111 -0
- synth_ai/sdk/task/proxy.py +287 -0
- synth_ai/sdk/task/rubrics/__init__.py +55 -0
- synth_ai/sdk/task/rubrics/loaders.py +156 -0
- synth_ai/sdk/task/rubrics/models.py +57 -0
- synth_ai/sdk/task/rubrics/scoring.py +116 -0
- synth_ai/sdk/task/rubrics/strict.py +149 -0
- synth_ai/sdk/task/rubrics.py +219 -0
- synth_ai/sdk/task/server.py +631 -0
- synth_ai/sdk/task/trace_correlation_helpers.py +539 -0
- synth_ai/sdk/task/tracing_utils.py +95 -0
- synth_ai/sdk/task/validators.py +441 -0
- synth_ai/sdk/task/vendors.py +59 -0
- synth_ai/sdk/training/__init__.py +102 -0
- synth_ai/sdk/tunnels/__init__.py +83 -0
- synth_ai/sdk/tunnels/cleanup.py +83 -0
- synth_ai/sdk/tunnels/ports.py +120 -0
- synth_ai/utils/__init__.py +213 -0
- synth_ai-0.4.3.dist-info/METADATA +262 -0
- synth_ai-0.4.3.dist-info/RECORD +370 -0
- {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/entry_points.txt +0 -1
- synth_ai/cli/calc.py +0 -69
- synth_ai/cli/demo.py +0 -144
- synth_ai/cli/legacy_root_backup.py +0 -470
- synth_ai/cli/man.py +0 -106
- synth_ai/cli/rl_demo.py +0 -202
- synth_ai/cli/status.py +0 -133
- synth_ai/config/base_url.py +0 -107
- synth_ai/core/experiment.py +0 -15
- synth_ai/core/system.py +0 -15
- synth_ai/demos/core/__init__.py +0 -1
- synth_ai/demos/demo_task_apps/__init__.py +0 -1
- synth_ai/demos/demo_task_apps/math/config.toml +0 -129
- synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +0 -22
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +0 -415
- synth_ai/environments/__init__.py +0 -31
- synth_ai/environments/environment/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/__init__.py +0 -1
- synth_ai/environments/environment/artifacts/base.py +0 -52
- synth_ai/environments/environment/core.py +0 -67
- synth_ai/environments/environment/db/__init__.py +0 -1
- synth_ai/environments/environment/db/sqlite.py +0 -45
- synth_ai/environments/environment/registry.py +0 -233
- synth_ai/environments/environment/resources/sqlite.py +0 -45
- synth_ai/environments/environment/results.py +0 -1
- synth_ai/environments/environment/rewards/__init__.py +0 -1
- synth_ai/environments/environment/rewards/core.py +0 -29
- synth_ai/environments/environment/shared_engine.py +0 -26
- synth_ai/environments/environment/tools/__init__.py +0 -200
- synth_ai/environments/examples/__init__.py +0 -1
- synth_ai/environments/examples/bandit/__init__.py +0 -33
- synth_ai/environments/examples/bandit/engine.py +0 -294
- synth_ai/environments/examples/bandit/environment.py +0 -194
- synth_ai/environments/examples/bandit/taskset.py +0 -200
- synth_ai/environments/examples/crafter_classic/__init__.py +0 -8
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +0 -250
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +0 -59
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +0 -24
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +0 -1194
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +0 -32
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +0 -738
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +0 -384
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +0 -53
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +0 -178
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +0 -222
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +0 -183
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +0 -210
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +0 -206
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +0 -64
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +0 -77
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +0 -324
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +0 -580
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +0 -362
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +0 -49
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +0 -332
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +0 -97
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +0 -217
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +0 -87
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +0 -88
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +0 -400
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +0 -195
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +0 -56
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +0 -858
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +0 -52
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +0 -874
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +0 -216
- synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +0 -296
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +0 -58
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +0 -464
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +0 -152
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +0 -51
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +0 -1412
- synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +0 -112
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +0 -203
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +0 -305
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +0 -126
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +0 -94
- synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +0 -142
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +0 -26
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +0 -984
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +0 -724
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +0 -386
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +0 -205
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +0 -150
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +0 -283
- synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +0 -280
- synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +0 -456
- synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +0 -166
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +0 -102
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +0 -128
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +0 -655
- synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +0 -202
- synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +0 -166
- synth_ai/environments/examples/crafter_classic/config_logging.py +0 -111
- synth_ai/environments/examples/crafter_classic/debug_translation.py +0 -0
- synth_ai/environments/examples/crafter_classic/engine.py +0 -579
- synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +0 -64
- synth_ai/environments/examples/crafter_classic/engine_helpers/action_map.py +0 -6
- synth_ai/environments/examples/crafter_classic/engine_helpers/serialization.py +0 -75
- synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +0 -267
- synth_ai/environments/examples/crafter_classic/environment.py +0 -404
- synth_ai/environments/examples/crafter_classic/taskset.py +0 -233
- synth_ai/environments/examples/crafter_classic/trace_hooks_v3.py +0 -228
- synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +0 -299
- synth_ai/environments/examples/crafter_custom/__init__.py +0 -4
- synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +0 -202
- synth_ai/environments/examples/crafter_custom/crafter/__init__.py +0 -7
- synth_ai/environments/examples/crafter_custom/crafter/config.py +0 -182
- synth_ai/environments/examples/crafter_custom/crafter/constants.py +0 -8
- synth_ai/environments/examples/crafter_custom/crafter/engine.py +0 -269
- synth_ai/environments/examples/crafter_custom/crafter/env.py +0 -262
- synth_ai/environments/examples/crafter_custom/crafter/objects.py +0 -417
- synth_ai/environments/examples/crafter_custom/crafter/recorder.py +0 -187
- synth_ai/environments/examples/crafter_custom/crafter/worldgen.py +0 -118
- synth_ai/environments/examples/crafter_custom/dataset_builder.py +0 -373
- synth_ai/environments/examples/crafter_custom/environment.py +0 -312
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +0 -159
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +0 -158
- synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +0 -71
- synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +0 -105
- synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +0 -119
- synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +0 -52
- synth_ai/environments/examples/crafter_custom/run_dataset.py +0 -305
- synth_ai/environments/examples/enron/art_helpers/email_search_tools.py +0 -156
- synth_ai/environments/examples/enron/art_helpers/local_email_db.py +0 -281
- synth_ai/environments/examples/enron/art_helpers/types_enron.py +0 -25
- synth_ai/environments/examples/enron/engine.py +0 -295
- synth_ai/environments/examples/enron/environment.py +0 -166
- synth_ai/environments/examples/enron/taskset.py +0 -112
- synth_ai/environments/examples/enron/units/keyword_stats.py +0 -112
- synth_ai/environments/examples/minigrid/__init__.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +0 -1188
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +0 -48
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +0 -562
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +0 -221
- synth_ai/environments/examples/minigrid/engine.py +0 -589
- synth_ai/environments/examples/minigrid/environment.py +0 -274
- synth_ai/environments/examples/minigrid/environment_mapping.py +0 -242
- synth_ai/environments/examples/minigrid/puzzle_loader.py +0 -417
- synth_ai/environments/examples/minigrid/taskset.py +0 -583
- synth_ai/environments/examples/nethack/__init__.py +0 -7
- synth_ai/environments/examples/nethack/achievements.py +0 -337
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +0 -981
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +0 -74
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +0 -831
- synth_ai/environments/examples/nethack/engine.py +0 -739
- synth_ai/environments/examples/nethack/environment.py +0 -256
- synth_ai/environments/examples/nethack/helpers/__init__.py +0 -41
- synth_ai/environments/examples/nethack/helpers/action_mapping.py +0 -301
- synth_ai/environments/examples/nethack/helpers/nle_wrapper.py +0 -402
- synth_ai/environments/examples/nethack/helpers/observation_utils.py +0 -433
- synth_ai/environments/examples/nethack/helpers/recording_wrapper.py +0 -200
- synth_ai/environments/examples/nethack/helpers/trajectory_recorder.py +0 -269
- synth_ai/environments/examples/nethack/helpers/visualization/replay_viewer.py +0 -308
- synth_ai/environments/examples/nethack/helpers/visualization/visualizer.py +0 -431
- synth_ai/environments/examples/nethack/taskset.py +0 -323
- synth_ai/environments/examples/red/__init__.py +0 -7
- synth_ai/environments/examples/red/agent_demos/__init__.py +0 -1
- synth_ai/environments/examples/red/config_logging.py +0 -110
- synth_ai/environments/examples/red/engine.py +0 -694
- synth_ai/environments/examples/red/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/red/engine_helpers/memory_map.py +0 -28
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +0 -276
- synth_ai/environments/examples/red/engine_helpers/reward_library/__init__.py +0 -142
- synth_ai/environments/examples/red/engine_helpers/reward_library/adaptive_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/battle_rewards.py +0 -284
- synth_ai/environments/examples/red/engine_helpers/reward_library/composite_rewards.py +0 -150
- synth_ai/environments/examples/red/engine_helpers/reward_library/economy_rewards.py +0 -138
- synth_ai/environments/examples/red/engine_helpers/reward_library/efficiency_rewards.py +0 -57
- synth_ai/environments/examples/red/engine_helpers/reward_library/exploration_rewards.py +0 -331
- synth_ai/environments/examples/red/engine_helpers/reward_library/novelty_rewards.py +0 -121
- synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_rewards.py +0 -559
- synth_ai/environments/examples/red/engine_helpers/reward_library/pokemon_rewards.py +0 -313
- synth_ai/environments/examples/red/engine_helpers/reward_library/social_rewards.py +0 -148
- synth_ai/environments/examples/red/engine_helpers/reward_library/story_rewards.py +0 -247
- synth_ai/environments/examples/red/engine_helpers/screen_analysis.py +0 -368
- synth_ai/environments/examples/red/engine_helpers/state_extraction.py +0 -140
- synth_ai/environments/examples/red/environment.py +0 -238
- synth_ai/environments/examples/red/taskset.py +0 -79
- synth_ai/environments/examples/red/units/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +0 -899
- synth_ai/environments/examples/sokoban/engine.py +0 -678
- synth_ai/environments/examples/sokoban/engine_helpers/__init__.py +0 -1
- synth_ai/environments/examples/sokoban/engine_helpers/room_utils.py +0 -657
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/__init__.py +0 -18
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/__init__.py +0 -3
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/boxoban_env.py +0 -131
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/render_utils.py +0 -370
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/room_utils.py +0 -332
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env.py +0 -306
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_fixed_targets.py +0 -67
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_pull.py +0 -115
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_two_player.py +0 -123
- synth_ai/environments/examples/sokoban/engine_helpers/vendored/envs/sokoban_env_variations.py +0 -394
- synth_ai/environments/examples/sokoban/environment.py +0 -229
- synth_ai/environments/examples/sokoban/generate_verified_puzzles.py +0 -440
- synth_ai/environments/examples/sokoban/puzzle_loader.py +0 -312
- synth_ai/environments/examples/sokoban/taskset.py +0 -428
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/environments/examples/tictactoe/__init__.py +0 -1
- synth_ai/environments/examples/tictactoe/engine.py +0 -368
- synth_ai/environments/examples/tictactoe/environment.py +0 -240
- synth_ai/environments/examples/tictactoe/taskset.py +0 -215
- synth_ai/environments/examples/verilog/__init__.py +0 -10
- synth_ai/environments/examples/verilog/engine.py +0 -329
- synth_ai/environments/examples/verilog/environment.py +0 -350
- synth_ai/environments/examples/verilog/taskset.py +0 -420
- synth_ai/environments/examples/wordle/__init__.py +0 -29
- synth_ai/environments/examples/wordle/engine.py +0 -398
- synth_ai/environments/examples/wordle/environment.py +0 -159
- synth_ai/environments/examples/wordle/helpers/generate_instances_wordfreq.py +0 -75
- synth_ai/environments/examples/wordle/taskset.py +0 -230
- synth_ai/environments/reproducibility/core.py +0 -42
- synth_ai/environments/reproducibility/helpers.py +0 -0
- synth_ai/environments/reproducibility/tree.py +0 -364
- synth_ai/environments/service/app.py +0 -98
- synth_ai/environments/service/core_routes.py +0 -1020
- synth_ai/environments/service/external_registry.py +0 -56
- synth_ai/environments/service/registry.py +0 -9
- synth_ai/environments/stateful/__init__.py +0 -1
- synth_ai/environments/stateful/core.py +0 -163
- synth_ai/environments/stateful/engine.py +0 -21
- synth_ai/environments/stateful/state.py +0 -7
- synth_ai/environments/tasks/api.py +0 -19
- synth_ai/environments/tasks/core.py +0 -80
- synth_ai/environments/tasks/filters.py +0 -41
- synth_ai/environments/tasks/utils.py +0 -91
- synth_ai/environments/v0_observability/history.py +0 -3
- synth_ai/environments/v0_observability/log.py +0 -2
- synth_ai/evals/base.py +0 -15
- synth_ai/experimental/synth_oss.py +0 -446
- synth_ai/handshake.py +0 -63
- synth_ai/http.py +0 -26
- synth_ai/http_client.py +0 -104
- synth_ai/inference/client.py +0 -20
- synth_ai/install_sqld.sh +0 -40
- synth_ai/jobs/client.py +0 -246
- synth_ai/learning/__init__.py +0 -24
- synth_ai/learning/config.py +0 -43
- synth_ai/learning/filtering.py +0 -0
- synth_ai/learning/ft_client.py +0 -59
- synth_ai/learning/offline/dpo.py +0 -0
- synth_ai/learning/offline/providers.py +0 -7
- synth_ai/learning/offline/sft.py +0 -0
- synth_ai/learning/offline/shared.py +0 -0
- synth_ai/learning/online/grpo.py +0 -0
- synth_ai/learning/online/irft.py +0 -0
- synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
- synth_ai/learning/prompts/gepa.py +0 -0
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
- synth_ai/learning/prompts/mipro.py +0 -289
- synth_ai/learning/prompts/random_search.py +0 -246
- synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
- synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
- synth_ai/learning/sse.py +0 -58
- synth_ai/learning/validators.py +0 -48
- synth_ai/lm/__init__.py +0 -51
- synth_ai/lm/caching/constants.py +0 -6
- synth_ai/lm/caching/dbs.py +0 -0
- synth_ai/lm/caching/ephemeral.py +0 -102
- synth_ai/lm/caching/handler.py +0 -137
- synth_ai/lm/caching/initialize.py +0 -11
- synth_ai/lm/caching/persistent.py +0 -114
- synth_ai/lm/config.py +0 -110
- synth_ai/lm/constants.py +0 -32
- synth_ai/lm/core/__init__.py +0 -8
- synth_ai/lm/core/all.py +0 -73
- synth_ai/lm/core/exceptions.py +0 -7
- synth_ai/lm/core/main.py +0 -319
- synth_ai/lm/core/main_v3.py +0 -594
- synth_ai/lm/core/synth_models.py +0 -48
- synth_ai/lm/core/vendor_clients.py +0 -188
- synth_ai/lm/cost/__init__.py +0 -0
- synth_ai/lm/cost/monitor.py +0 -1
- synth_ai/lm/cost/statefulness.py +0 -1
- synth_ai/lm/injection.py +0 -80
- synth_ai/lm/overrides.py +0 -206
- synth_ai/lm/provider_support/__init__.py +0 -8
- synth_ai/lm/provider_support/anthropic.py +0 -972
- synth_ai/lm/provider_support/openai.py +0 -1139
- synth_ai/lm/provider_support/suppress_logging.py +0 -31
- synth_ai/lm/structured_outputs/__init__.py +0 -0
- synth_ai/lm/structured_outputs/handler.py +0 -440
- synth_ai/lm/structured_outputs/inject.py +0 -297
- synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
- synth_ai/lm/tools/__init__.py +0 -3
- synth_ai/lm/tools/base.py +0 -172
- synth_ai/lm/unified_interface.py +0 -202
- synth_ai/lm/vendors/__init__.py +0 -0
- synth_ai/lm/vendors/base.py +0 -81
- synth_ai/lm/vendors/core/__init__.py +0 -0
- synth_ai/lm/vendors/core/anthropic_api.py +0 -387
- synth_ai/lm/vendors/core/gemini_api.py +0 -292
- synth_ai/lm/vendors/core/mistral_api.py +0 -322
- synth_ai/lm/vendors/core/openai_api.py +0 -225
- synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
- synth_ai/lm/vendors/local/__init__.py +0 -0
- synth_ai/lm/vendors/local/ollama.py +0 -0
- synth_ai/lm/vendors/openai_standard.py +0 -780
- synth_ai/lm/vendors/openai_standard_responses.py +0 -256
- synth_ai/lm/vendors/retries.py +0 -22
- synth_ai/lm/vendors/supported/__init__.py +0 -0
- synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
- synth_ai/lm/vendors/supported/deepseek.py +0 -69
- synth_ai/lm/vendors/supported/grok.py +0 -75
- synth_ai/lm/vendors/supported/groq.py +0 -16
- synth_ai/lm/vendors/supported/ollama.py +0 -15
- synth_ai/lm/vendors/supported/openrouter.py +0 -74
- synth_ai/lm/vendors/supported/together.py +0 -11
- synth_ai/lm/vendors/synth_client.py +0 -808
- synth_ai/lm/warmup.py +0 -186
- synth_ai/rl/secrets.py +0 -19
- synth_ai/scripts/verify_rewards.py +0 -100
- synth_ai/task/__init__.py +0 -10
- synth_ai/task/contracts.py +0 -120
- synth_ai/task/health.py +0 -28
- synth_ai/task/validators.py +0 -12
- synth_ai/tracing/__init__.py +0 -30
- synth_ai/tracing_v1/__init__.py +0 -33
- synth_ai/tracing_v3/config.py +0 -84
- synth_ai/tracing_v3/storage/config.py +0 -62
- synth_ai/tracing_v3/turso/__init__.py +0 -25
- synth_ai/tracing_v3/turso/daemon.py +0 -144
- synth_ai/tracing_v3/turso/manager.py +0 -760
- synth_ai/v0/tracing/__init__.py +0 -0
- synth_ai/v0/tracing/abstractions.py +0 -224
- synth_ai/v0/tracing/base_client.py +0 -91
- synth_ai/v0/tracing/client_manager.py +0 -131
- synth_ai/v0/tracing/config.py +0 -142
- synth_ai/v0/tracing/context.py +0 -146
- synth_ai/v0/tracing/decorators.py +0 -682
- synth_ai/v0/tracing/events/__init__.py +0 -0
- synth_ai/v0/tracing/events/manage.py +0 -147
- synth_ai/v0/tracing/events/scope.py +0 -86
- synth_ai/v0/tracing/events/store.py +0 -228
- synth_ai/v0/tracing/immediate_client.py +0 -151
- synth_ai/v0/tracing/local.py +0 -18
- synth_ai/v0/tracing/log_client_base.py +0 -73
- synth_ai/v0/tracing/retry_queue.py +0 -186
- synth_ai/v0/tracing/trackers.py +0 -515
- synth_ai/v0/tracing/upload.py +0 -512
- synth_ai/v0/tracing/utils.py +0 -9
- synth_ai/v0/tracing_v1/__init__.py +0 -16
- synth_ai/v0/tracing_v1/abstractions.py +0 -224
- synth_ai/v0/tracing_v1/base_client.py +0 -91
- synth_ai/v0/tracing_v1/client_manager.py +0 -131
- synth_ai/v0/tracing_v1/config.py +0 -142
- synth_ai/v0/tracing_v1/context.py +0 -146
- synth_ai/v0/tracing_v1/decorators.py +0 -703
- synth_ai/v0/tracing_v1/events/__init__.py +0 -0
- synth_ai/v0/tracing_v1/events/manage.py +0 -147
- synth_ai/v0/tracing_v1/events/scope.py +0 -86
- synth_ai/v0/tracing_v1/events/store.py +0 -228
- synth_ai/v0/tracing_v1/immediate_client.py +0 -151
- synth_ai/v0/tracing_v1/local.py +0 -18
- synth_ai/v0/tracing_v1/log_client_base.py +0 -73
- synth_ai/v0/tracing_v1/retry_queue.py +0 -186
- synth_ai/v0/tracing_v1/trackers.py +0 -515
- synth_ai/v0/tracing_v1/upload.py +0 -527
- synth_ai/v0/tracing_v1/utils.py +0 -9
- synth_ai/zyk/__init__.py +0 -30
- synth_ai-0.2.8.dev2.dist-info/METADATA +0 -129
- synth_ai-0.2.8.dev2.dist-info/RECORD +0 -420
- /synth_ai/{demos → cli/demo_apps}/demo_task_apps/math/__init__.py +0 -0
- /synth_ai/{lm/caching → core/apps}/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/lm_call_record_abstractions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/__init__.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/exceptions.py +0 -0
- /synth_ai/{tracing_v3 → core/tracing_v3}/storage/types.py +0 -0
- /synth_ai/{compound/cais.py → py.typed} +0 -0
- /synth_ai/{learning → sdk/learning}/core.py +0 -0
- /synth_ai/{learning → sdk/learning}/gateway.py +0 -0
- {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.8.dev2.dist-info → synth_ai-0.4.3.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,1706 @@
|
|
|
1
|
+
"""Prompt Learning configuration models for MIPRO and GEPA."""
|
|
2
|
+
from __future__ import annotations
|
|
3
|
+
|
|
4
|
+
from collections.abc import Mapping, Sequence
|
|
5
|
+
from enum import Enum
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any, Dict, Literal, Optional
|
|
8
|
+
|
|
9
|
+
from pydantic import Field, field_validator, model_validator
|
|
10
|
+
|
|
11
|
+
from ..utils import load_toml
|
|
12
|
+
from .shared import ExtraModel
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class SeedRange(ExtraModel):
|
|
16
|
+
"""Compact seed range notation for TOML configs.
|
|
17
|
+
|
|
18
|
+
Allows writing `seeds = {start = 0, end = 50}` instead of `seeds = [0, 1, 2, ..., 49]`.
|
|
19
|
+
|
|
20
|
+
Examples:
|
|
21
|
+
seeds = {start = 0, end = 10} # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
|
|
22
|
+
seeds = {start = 0, end = 100, step = 2} # [0, 2, 4, ..., 98]
|
|
23
|
+
"""
|
|
24
|
+
start: int
|
|
25
|
+
end: int
|
|
26
|
+
step: int = 1
|
|
27
|
+
|
|
28
|
+
def to_list(self) -> list[int]:
|
|
29
|
+
"""Convert range to list of integers."""
|
|
30
|
+
return list(range(self.start, self.end, self.step))
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def _parse_seeds(value: Any) -> list[int] | None:
|
|
34
|
+
"""Parse seed values that can be either a list or a range dict.
|
|
35
|
+
|
|
36
|
+
Args:
|
|
37
|
+
value: Either a list of ints or a dict with 'start', 'end', and optional 'step'.
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
List of integers, or None if value is None.
|
|
41
|
+
|
|
42
|
+
Examples:
|
|
43
|
+
_parse_seeds([0, 1, 2, 3]) # [0, 1, 2, 3]
|
|
44
|
+
_parse_seeds({"start": 0, "end": 4}) # [0, 1, 2, 3]
|
|
45
|
+
_parse_seeds({"start": 0, "end": 10, "step": 2}) # [0, 2, 4, 6, 8]
|
|
46
|
+
"""
|
|
47
|
+
if value is None:
|
|
48
|
+
return None
|
|
49
|
+
if isinstance(value, dict) and "start" in value and "end" in value:
|
|
50
|
+
seed_range = SeedRange.model_validate(value)
|
|
51
|
+
return seed_range.to_list()
|
|
52
|
+
if isinstance(value, list):
|
|
53
|
+
return list(value)
|
|
54
|
+
raise ValueError(f"Seeds must be a list or a range dict with 'start' and 'end' keys, got {type(value).__name__}")
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
class InferenceMode(str, Enum):
|
|
58
|
+
synth_hosted = "synth_hosted"
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
class ProviderName(str, Enum):
|
|
62
|
+
openai = "openai"
|
|
63
|
+
groq = "groq"
|
|
64
|
+
google = "google"
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class PromptLearningPolicyConfig(ExtraModel):
|
|
68
|
+
"""Policy configuration for prompt learning (model, provider, etc.)."""
|
|
69
|
+
model: str
|
|
70
|
+
provider: ProviderName
|
|
71
|
+
inference_url: str | None = None # Optional - trainer provides it in rollout requests (ignored if present)
|
|
72
|
+
inference_mode: InferenceMode = InferenceMode.synth_hosted
|
|
73
|
+
temperature: float = 0.0
|
|
74
|
+
max_completion_tokens: int = 512
|
|
75
|
+
policy_name: str | None = None
|
|
76
|
+
|
|
77
|
+
@field_validator("inference_url", mode="before")
|
|
78
|
+
@classmethod
|
|
79
|
+
def _strip_inference_url(cls, v: str | None) -> str | None:
|
|
80
|
+
"""Strip whitespace from inference_url if provided."""
|
|
81
|
+
if v is None:
|
|
82
|
+
return None
|
|
83
|
+
if isinstance(v, str):
|
|
84
|
+
v = v.strip()
|
|
85
|
+
# Validate that URL starts with http:// or https:// if provided (non-empty)
|
|
86
|
+
if v and not v.startswith(("http://", "https://")):
|
|
87
|
+
raise ValueError("inference_url must start with http:// or https://")
|
|
88
|
+
# Reject empty strings after stripping
|
|
89
|
+
if not v:
|
|
90
|
+
raise ValueError("inference_url must start with http:// or https://")
|
|
91
|
+
return v
|
|
92
|
+
|
|
93
|
+
|
|
94
|
+
class MessagePatternConfig(ExtraModel):
|
|
95
|
+
"""Configuration for a single message pattern."""
|
|
96
|
+
role: str
|
|
97
|
+
pattern: str
|
|
98
|
+
order: int = 0
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
class PromptPatternConfig(ExtraModel):
|
|
102
|
+
"""Initial prompt pattern configuration."""
|
|
103
|
+
id: str | None = None
|
|
104
|
+
name: str | None = None
|
|
105
|
+
messages: list[MessagePatternConfig] = []
|
|
106
|
+
wildcards: dict[str, str] = Field(default_factory=dict)
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
class MIPROMetaConfig(ExtraModel):
|
|
110
|
+
"""DEPRECATED: Meta-model config is now controlled by proposer_effort and proposer_output_tokens.
|
|
111
|
+
|
|
112
|
+
This class is kept for backwards compatibility but should not be used.
|
|
113
|
+
Use proposer_effort (LOW_CONTEXT, LOW, MEDIUM, HIGH) and proposer_output_tokens (RAPID, FAST, SLOW) instead.
|
|
114
|
+
"""
|
|
115
|
+
model: str | None = None
|
|
116
|
+
provider: str | None = None
|
|
117
|
+
inference_url: str | None = None
|
|
118
|
+
temperature: float | None = None
|
|
119
|
+
max_tokens: int | None = None
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class MIPROStageConfig(ExtraModel):
|
|
123
|
+
"""Configuration for a single MIPRO stage inside a module.
|
|
124
|
+
|
|
125
|
+
Each stage MUST have its own policy configuration. The policy field is required
|
|
126
|
+
and must include 'model' and 'provider' fields.
|
|
127
|
+
"""
|
|
128
|
+
stage_id: str
|
|
129
|
+
baseline_instruction: str
|
|
130
|
+
baseline_messages: list[dict[str, str]] = Field(default_factory=list)
|
|
131
|
+
max_instruction_slots: int | None = None
|
|
132
|
+
max_demo_slots: int | None = None
|
|
133
|
+
policy: PromptLearningPolicyConfig | dict[str, Any] = Field(
|
|
134
|
+
...,
|
|
135
|
+
description="Required per-stage policy configuration. Must include 'model' and 'provider' fields."
|
|
136
|
+
)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class MIPROModuleConfig(ExtraModel):
|
|
140
|
+
"""Configuration for a single module in a MIPRO pipeline."""
|
|
141
|
+
module_id: str
|
|
142
|
+
stages: list[MIPROStageConfig] = Field(default_factory=list)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
class MIPROSeedConfig(ExtraModel):
|
|
146
|
+
"""Seed pools used across bootstrap, optimization, and evaluation."""
|
|
147
|
+
bootstrap: list[int] = Field(default_factory=list)
|
|
148
|
+
online: list[int] = Field(default_factory=list)
|
|
149
|
+
test: list[int] = Field(default_factory=list)
|
|
150
|
+
reference: list[int] = Field(default_factory=list)
|
|
151
|
+
|
|
152
|
+
@field_validator("bootstrap", "online", "test", "reference", mode="before")
|
|
153
|
+
@classmethod
|
|
154
|
+
def _parse_seed_pools(cls, v: Any) -> list[int]:
|
|
155
|
+
"""Parse seed pools that can be either a list or range dict."""
|
|
156
|
+
return _parse_seeds(v) or []
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class PromptLearningJudgeConfig(ExtraModel):
|
|
160
|
+
"""Verifier configuration shared by GEPA and MIPRO.
|
|
161
|
+
|
|
162
|
+
This configures LLM-based evaluation of agent trajectories during prompt optimization.
|
|
163
|
+
You can use standard rubrics or registered Verifier Graphs.
|
|
164
|
+
|
|
165
|
+
Attributes:
|
|
166
|
+
enabled: Whether to enable verifier-based scoring.
|
|
167
|
+
reward_source: Source of the final reward for optimization.
|
|
168
|
+
- "task_app": Use only environment rewards from task app (default).
|
|
169
|
+
- "judge": Use only verifier quality scores.
|
|
170
|
+
- "fused": Weighted combination of environment and verifier rewards.
|
|
171
|
+
backend_base: Base URL for the verifier service (e.g. "https://api.usesynth.ai").
|
|
172
|
+
backend_api_key_env: Env var containing the Synth API key (default: "SYNTH_API_KEY").
|
|
173
|
+
backend_provider: Provider for the verifier model (e.g. "openai", "groq").
|
|
174
|
+
backend_model: Model used to execute the verifier rubric or graph (e.g. "gpt-4o-mini").
|
|
175
|
+
synth_verifier_id: ID or Name of a registered Verifier Graph or Rubric on the backend.
|
|
176
|
+
Use this to point to a specific, versioned verifier artifact.
|
|
177
|
+
backend_rubric_id: Legacy alias for synth_verifier_id.
|
|
178
|
+
backend_event_enabled: Whether to enable fine-grained event-level scoring.
|
|
179
|
+
backend_outcome_enabled: Whether to enable episode-level outcome scoring.
|
|
180
|
+
weight_env: Weight for environment rewards in "fused" mode (default: 1.0).
|
|
181
|
+
weight_event: Weight for verifier event rewards in "fused" mode (default: 0.0).
|
|
182
|
+
weight_outcome: Weight for verifier outcome rewards in "fused" mode (default: 0.0).
|
|
183
|
+
"""
|
|
184
|
+
enabled: bool = False
|
|
185
|
+
reward_source: Literal["task_app", "judge", "fused"] = "task_app"
|
|
186
|
+
backend_base: str = ""
|
|
187
|
+
backend_api_key_env: str = "SYNTH_API_KEY"
|
|
188
|
+
backend_provider: str = ""
|
|
189
|
+
backend_model: str = ""
|
|
190
|
+
synth_verifier_id: str = "" # Preferred field for Registered VerifierGraph or Rubric ID
|
|
191
|
+
backend_rubric_id: str = "" # Legacy alias for synth_verifier_id
|
|
192
|
+
backend_event_enabled: bool = True
|
|
193
|
+
backend_outcome_enabled: bool = True
|
|
194
|
+
backend_options: Dict[str, Any] = Field(default_factory=dict)
|
|
195
|
+
concurrency: int = 8
|
|
196
|
+
timeout: float = 60.0
|
|
197
|
+
weight_env: float = 1.0
|
|
198
|
+
weight_event: float = 0.0
|
|
199
|
+
weight_outcome: float = 0.0
|
|
200
|
+
spec_path: Optional[str] = None
|
|
201
|
+
spec_max_tokens: int = 5000
|
|
202
|
+
spec_context: Optional[str] = None
|
|
203
|
+
|
|
204
|
+
@model_validator(mode="before")
|
|
205
|
+
@classmethod
|
|
206
|
+
def _sync_verifier_ids(cls, data: Any) -> Any:
|
|
207
|
+
"""Sync synth_verifier_id and backend_rubric_id."""
|
|
208
|
+
if isinstance(data, dict):
|
|
209
|
+
if not data.get("synth_verifier_id") and data.get("backend_rubric_id"):
|
|
210
|
+
data["synth_verifier_id"] = data["backend_rubric_id"]
|
|
211
|
+
elif not data.get("backend_rubric_id") and data.get("synth_verifier_id"):
|
|
212
|
+
data["backend_rubric_id"] = data["synth_verifier_id"]
|
|
213
|
+
return data
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
class PromptLearningVerifierConfig(PromptLearningJudgeConfig):
|
|
217
|
+
"""Alias for PromptLearningJudgeConfig with verifier terminology."""
|
|
218
|
+
|
|
219
|
+
|
|
220
|
+
class ProxyModelsConfig(ExtraModel):
|
|
221
|
+
"""Configuration for proxy usage on policy evaluations.
|
|
222
|
+
|
|
223
|
+
Uses a low-fidelity (LO) model for most evaluations and a high-fidelity (HI) model
|
|
224
|
+
for verification, with dynamic switching based on calibration and correlation.
|
|
225
|
+
|
|
226
|
+
The proxy system starts by evaluating examples with both HI and LO models to build
|
|
227
|
+
a calibration regression. Once calibrated (R² >= r2_thresh), it switches to using
|
|
228
|
+
only the LO model for most evaluations, falling back to HI when reliability drops.
|
|
229
|
+
|
|
230
|
+
Attributes:
|
|
231
|
+
hi_provider: Provider for high-fidelity model (e.g., "openai", "groq", "google").
|
|
232
|
+
This is the expensive model used for ground-truth evaluations.
|
|
233
|
+
hi_model: High-fidelity model name (e.g., "gpt-4o", "gpt-oss-120b").
|
|
234
|
+
Must be a supported model for the provider.
|
|
235
|
+
lo_provider: Provider for low-fidelity proxy model (e.g., "groq", "openai").
|
|
236
|
+
This is the cheaper model used for most evaluations after calibration.
|
|
237
|
+
lo_model: Low-fidelity proxy model name (e.g., "gpt-oss-20b", "gpt-4o-mini").
|
|
238
|
+
Must be a supported model for the provider. Should be cheaper than hi_model.
|
|
239
|
+
n_min_hi: Minimum number of HI evaluations before allowing proxy substitution.
|
|
240
|
+
Default: 5. Ensures sufficient calibration data before proxying.
|
|
241
|
+
r2_thresh: R² correlation threshold (0.0-1.0) required to enable proxying.
|
|
242
|
+
Default: 0.5. Higher values require stronger correlation before proxying.
|
|
243
|
+
r2_stop: R² threshold (0.0-1.0) below which proxying is disabled.
|
|
244
|
+
Default: 0.2. If correlation drops below this, revert to HI-only.
|
|
245
|
+
sigma_max: Maximum residual variance (sigma²) allowed for proxy calibration.
|
|
246
|
+
Default: 1e6. Higher values allow more variance in predictions.
|
|
247
|
+
sigma_stop: Stop proxying if residual variance exceeds this value.
|
|
248
|
+
Default: 1e9. If variance exceeds this, revert to HI-only.
|
|
249
|
+
verify_every: Periodically verify calibration every N LO-only evaluations.
|
|
250
|
+
Default: 0 (no periodic verification). Set to >0 to periodically run BOTH
|
|
251
|
+
to check if calibration is still valid.
|
|
252
|
+
proxy_patience_usd: Stop proxying if cumulative net gain drops below this (USD).
|
|
253
|
+
Default: -100.0. Negative values allow some loss before stopping. Set to 0.0
|
|
254
|
+
to stop immediately if proxy becomes unprofitable.
|
|
255
|
+
"""
|
|
256
|
+
hi_provider: str
|
|
257
|
+
hi_model: str
|
|
258
|
+
lo_provider: str
|
|
259
|
+
lo_model: str
|
|
260
|
+
n_min_hi: int = 5
|
|
261
|
+
r2_thresh: float = 0.5
|
|
262
|
+
r2_stop: float = 0.2
|
|
263
|
+
sigma_max: float = 1e6
|
|
264
|
+
sigma_stop: float = 1e9
|
|
265
|
+
verify_every: int = 0
|
|
266
|
+
proxy_patience_usd: float = -100.0
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
class AdaptiveCurriculumLevel(str, Enum):
|
|
270
|
+
"""Preset levels for adaptive pooling curriculum."""
|
|
271
|
+
NONE = "NONE"
|
|
272
|
+
LOW = "LOW"
|
|
273
|
+
MODERATE = "MODERATE"
|
|
274
|
+
HIGH = "HIGH"
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
class AdaptivePoolConfig(ExtraModel):
|
|
278
|
+
"""Configuration for adaptive pooling (dynamically adjusting evaluation pool size).
|
|
279
|
+
|
|
280
|
+
Reduces evaluation costs by focusing on the most informative examples while
|
|
281
|
+
maintaining optimization quality through informativeness-based selection.
|
|
282
|
+
|
|
283
|
+
The adaptive pool starts with a larger pool and gradually reduces to a minimum
|
|
284
|
+
size, selecting examples based on informativeness (variance across prompts).
|
|
285
|
+
Examples are divided into anchors (always evaluated) and exploration pool
|
|
286
|
+
(selected based on informativeness).
|
|
287
|
+
|
|
288
|
+
Attributes:
|
|
289
|
+
level: Preset level (NONE, LOW, MODERATE, HIGH). Default: LOW.
|
|
290
|
+
NONE disables adaptive pooling. Higher levels use smaller pools and
|
|
291
|
+
more aggressive annealing for greater cost savings.
|
|
292
|
+
anchor_size: Number of anchor examples that are always evaluated.
|
|
293
|
+
Default: 30. Anchors provide stable baseline for optimization.
|
|
294
|
+
Must be <= pool_min_size.
|
|
295
|
+
pool_init_size: Initial pool size at start of optimization.
|
|
296
|
+
Default: None (uses all available examples). Set to limit initial pool.
|
|
297
|
+
Must be >= pool_min_size if both are set.
|
|
298
|
+
pool_min_size: Target minimum pool size after annealing completes.
|
|
299
|
+
Default: None (uses anchor_size). Pool anneals linearly from
|
|
300
|
+
pool_init_size to pool_min_size between warmup_iters and anneal_stop_iter.
|
|
301
|
+
Must be >= anchor_size.
|
|
302
|
+
warmup_iters: Number of iterations before starting pool annealing.
|
|
303
|
+
Default: 5. During warmup, pool stays at pool_init_size to gather
|
|
304
|
+
informativeness data.
|
|
305
|
+
anneal_stop_iter: Iteration at which pool reaches pool_min_size.
|
|
306
|
+
Default: 20. Pool size decreases linearly from warmup_iters to this.
|
|
307
|
+
Must be > warmup_iters.
|
|
308
|
+
pool_update_period: Update informativeness scores every N generations.
|
|
309
|
+
Default: 3. More frequent updates (lower value) adapt faster but
|
|
310
|
+
require more computation.
|
|
311
|
+
min_evals_per_example: Minimum evaluations per example before computing
|
|
312
|
+
informativeness. Default: 3. Examples with fewer evals get info=0.0.
|
|
313
|
+
k_info_prompts: Number of top-performing prompts used for informativeness
|
|
314
|
+
computation. Default: 10. Only scores from these prompts are used to
|
|
315
|
+
compute variance-based informativeness.
|
|
316
|
+
info_buffer_factor: Buffer factor (0.0-1.0) for preserving informativeness
|
|
317
|
+
during pool reduction. Default: 0.9. Higher values preserve more
|
|
318
|
+
informativeness but allow less reduction. Lower values allow more
|
|
319
|
+
aggressive reduction but may lose informativeness.
|
|
320
|
+
info_epsilon: Small epsilon value added to prevent division by zero in
|
|
321
|
+
informativeness calculations. Default: 1e-6.
|
|
322
|
+
anchor_selection_method: Method for selecting anchor examples.
|
|
323
|
+
Default: "clustering". Options:
|
|
324
|
+
- "random": Random selection
|
|
325
|
+
- "clustering": Select diverse examples via clustering
|
|
326
|
+
exploration_strategy: Strategy for selecting exploration pool examples.
|
|
327
|
+
Default: "diversity". Options:
|
|
328
|
+
- "random": Random selection
|
|
329
|
+
- "diversity": Select diverse examples based on informativeness
|
|
330
|
+
heatup_reserve_pool: Optional list of seed IDs reserved for heat-up phase.
|
|
331
|
+
Default: None. If provided, these seeds are added back to pool during
|
|
332
|
+
heat-up phases to prevent overfitting to small pool.
|
|
333
|
+
heatup_trigger: When to trigger heat-up phase (adding seeds back to pool).
|
|
334
|
+
Default: "after_min_size". Options:
|
|
335
|
+
- "after_min_size": Trigger after pool reaches min_size
|
|
336
|
+
- "immediate": Trigger immediately
|
|
337
|
+
- "every_N_trials_after_min": Trigger periodically after min_size
|
|
338
|
+
heatup_size: Number of seeds to add during heat-up phase.
|
|
339
|
+
Default: 20. Seeds are selected from heatup_reserve_pool or reserve pool.
|
|
340
|
+
heatup_cooldown_trials: Number of trials to wait before cooling down
|
|
341
|
+
(removing heat-up seeds) after heat-up. Default: 50.
|
|
342
|
+
heatup_schedule: Whether heat-up repeats or happens once.
|
|
343
|
+
Default: "repeat". Options:
|
|
344
|
+
- "once": Heat-up happens once
|
|
345
|
+
- "repeat": Heat-up repeats after cooldown
|
|
346
|
+
"""
|
|
347
|
+
level: AdaptiveCurriculumLevel = AdaptiveCurriculumLevel.LOW
|
|
348
|
+
anchor_size: int = 30
|
|
349
|
+
pool_init_size: int | None = None
|
|
350
|
+
pool_min_size: int | None = None
|
|
351
|
+
warmup_iters: int = 5
|
|
352
|
+
anneal_stop_iter: int = 20
|
|
353
|
+
pool_update_period: int = 3
|
|
354
|
+
min_evals_per_example: int = 3
|
|
355
|
+
k_info_prompts: int = 10
|
|
356
|
+
info_buffer_factor: float = 0.9
|
|
357
|
+
info_epsilon: float = 1e-6
|
|
358
|
+
anchor_selection_method: Literal["random", "clustering"] = "clustering"
|
|
359
|
+
exploration_strategy: Literal["random", "diversity"] = "diversity"
|
|
360
|
+
heatup_reserve_pool: list[int] | None = None
|
|
361
|
+
heatup_trigger: Literal["after_min_size", "immediate", "every_N_trials_after_min"] = "after_min_size"
|
|
362
|
+
heatup_size: int = 20
|
|
363
|
+
heatup_cooldown_trials: int = 50
|
|
364
|
+
heatup_schedule: Literal["repeat", "once"] = "repeat"
|
|
365
|
+
|
|
366
|
+
@property
|
|
367
|
+
def enabled(self) -> bool:
|
|
368
|
+
"""Whether adaptive pooling is enabled (level != NONE)."""
|
|
369
|
+
return self.level != AdaptiveCurriculumLevel.NONE
|
|
370
|
+
|
|
371
|
+
|
|
372
|
+
class AdaptiveBatchLevel(str, Enum):
|
|
373
|
+
"""Preset levels for adaptive batch curriculum (GEPA only)."""
|
|
374
|
+
NONE = "NONE"
|
|
375
|
+
LOW = "LOW"
|
|
376
|
+
MODERATE = "MODERATE"
|
|
377
|
+
HIGH = "HIGH"
|
|
378
|
+
|
|
379
|
+
|
|
380
|
+
class GEPAAdaptiveBatchConfig(ExtraModel):
|
|
381
|
+
"""Configuration for adaptive batch evaluation (GEPA only).
|
|
382
|
+
|
|
383
|
+
Reduces evaluation costs by using smaller minibatches and subsampling validation.
|
|
384
|
+
"""
|
|
385
|
+
level: AdaptiveBatchLevel = AdaptiveBatchLevel.MODERATE
|
|
386
|
+
reflection_minibatch_size: int = 3 # Train examples per reflection step
|
|
387
|
+
min_local_improvement: float = 0.0 # Threshold for accepting proposals
|
|
388
|
+
val_evaluation_mode: Literal["full", "subsample"] = "subsample" # Validation mode
|
|
389
|
+
val_subsample_size: int = 64 # Subsample size when mode="subsample"
|
|
390
|
+
candidate_selection_strategy: Literal["coverage", "random"] = "coverage"
|
|
391
|
+
|
|
392
|
+
@property
|
|
393
|
+
def enabled(self) -> bool:
|
|
394
|
+
"""Whether adaptive batch is enabled (level != NONE)."""
|
|
395
|
+
return self.level != AdaptiveBatchLevel.NONE
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
# Default presets for adaptive pool (mirrors monorepo structure)
|
|
399
|
+
_ADAPTIVE_POOL_DEFAULTS: dict[AdaptiveCurriculumLevel, dict[str, Any]] = {
|
|
400
|
+
AdaptiveCurriculumLevel.NONE: {
|
|
401
|
+
"anchor_size": 0,
|
|
402
|
+
"pool_init_size": None,
|
|
403
|
+
"pool_min_size": None,
|
|
404
|
+
"warmup_iters": 999_999,
|
|
405
|
+
"anneal_stop_iter": 999_999,
|
|
406
|
+
"pool_update_period": 999_999,
|
|
407
|
+
"min_evals_per_example": 1,
|
|
408
|
+
"k_info_prompts": 0,
|
|
409
|
+
"info_buffer_factor": 1.0,
|
|
410
|
+
"info_epsilon": 1e-6,
|
|
411
|
+
"anchor_selection_method": "random",
|
|
412
|
+
"exploration_strategy": "random",
|
|
413
|
+
"heatup_reserve_pool": None,
|
|
414
|
+
"heatup_trigger": "after_min_size",
|
|
415
|
+
"heatup_size": 20,
|
|
416
|
+
"heatup_cooldown_trials": 50,
|
|
417
|
+
"heatup_schedule": "repeat",
|
|
418
|
+
},
|
|
419
|
+
AdaptiveCurriculumLevel.LOW: {
|
|
420
|
+
"anchor_size": 50,
|
|
421
|
+
"pool_init_size": 150,
|
|
422
|
+
"pool_min_size": 100,
|
|
423
|
+
"warmup_iters": 10,
|
|
424
|
+
"anneal_stop_iter": 30,
|
|
425
|
+
"pool_update_period": 2,
|
|
426
|
+
"min_evals_per_example": 5,
|
|
427
|
+
"k_info_prompts": 15,
|
|
428
|
+
"info_buffer_factor": 0.95,
|
|
429
|
+
"info_epsilon": 1e-6,
|
|
430
|
+
"anchor_selection_method": "clustering",
|
|
431
|
+
"exploration_strategy": "diversity",
|
|
432
|
+
"heatup_reserve_pool": None,
|
|
433
|
+
"heatup_trigger": "after_min_size",
|
|
434
|
+
"heatup_size": 20,
|
|
435
|
+
"heatup_cooldown_trials": 50,
|
|
436
|
+
"heatup_schedule": "repeat",
|
|
437
|
+
},
|
|
438
|
+
AdaptiveCurriculumLevel.MODERATE: {
|
|
439
|
+
"anchor_size": 30,
|
|
440
|
+
"pool_init_size": 100,
|
|
441
|
+
"pool_min_size": 50,
|
|
442
|
+
"warmup_iters": 5,
|
|
443
|
+
"anneal_stop_iter": 20,
|
|
444
|
+
"pool_update_period": 3,
|
|
445
|
+
"min_evals_per_example": 3,
|
|
446
|
+
"k_info_prompts": 10,
|
|
447
|
+
"info_buffer_factor": 0.9,
|
|
448
|
+
"info_epsilon": 1e-6,
|
|
449
|
+
"anchor_selection_method": "clustering",
|
|
450
|
+
"exploration_strategy": "diversity",
|
|
451
|
+
"heatup_reserve_pool": None,
|
|
452
|
+
"heatup_trigger": "after_min_size",
|
|
453
|
+
"heatup_size": 20,
|
|
454
|
+
"heatup_cooldown_trials": 50,
|
|
455
|
+
"heatup_schedule": "repeat",
|
|
456
|
+
},
|
|
457
|
+
AdaptiveCurriculumLevel.HIGH: {
|
|
458
|
+
"anchor_size": 20,
|
|
459
|
+
"pool_init_size": 60,
|
|
460
|
+
"pool_min_size": 30,
|
|
461
|
+
"warmup_iters": 3,
|
|
462
|
+
"anneal_stop_iter": 10,
|
|
463
|
+
"pool_update_period": 5,
|
|
464
|
+
"min_evals_per_example": 2,
|
|
465
|
+
"k_info_prompts": 5,
|
|
466
|
+
"info_buffer_factor": 0.8,
|
|
467
|
+
"info_epsilon": 1e-6,
|
|
468
|
+
"anchor_selection_method": "clustering",
|
|
469
|
+
"exploration_strategy": "diversity",
|
|
470
|
+
"heatup_reserve_pool": None,
|
|
471
|
+
"heatup_trigger": "after_min_size",
|
|
472
|
+
"heatup_size": 20,
|
|
473
|
+
"heatup_cooldown_trials": 50,
|
|
474
|
+
"heatup_schedule": "repeat",
|
|
475
|
+
},
|
|
476
|
+
}
|
|
477
|
+
|
|
478
|
+
# Default presets for adaptive batch (GEPA only)
|
|
479
|
+
_ADAPTIVE_BATCH_DEFAULTS: dict[AdaptiveBatchLevel, dict[str, Any]] = {
|
|
480
|
+
AdaptiveBatchLevel.NONE: {
|
|
481
|
+
"reflection_minibatch_size": 8,
|
|
482
|
+
"min_local_improvement": 0.0,
|
|
483
|
+
"val_evaluation_mode": "full",
|
|
484
|
+
"val_subsample_size": 64,
|
|
485
|
+
"candidate_selection_strategy": "random",
|
|
486
|
+
},
|
|
487
|
+
AdaptiveBatchLevel.LOW: {
|
|
488
|
+
"reflection_minibatch_size": 5,
|
|
489
|
+
"min_local_improvement": 0.0,
|
|
490
|
+
"val_evaluation_mode": "subsample",
|
|
491
|
+
"val_subsample_size": 80,
|
|
492
|
+
"candidate_selection_strategy": "coverage",
|
|
493
|
+
},
|
|
494
|
+
AdaptiveBatchLevel.MODERATE: {
|
|
495
|
+
"reflection_minibatch_size": 3,
|
|
496
|
+
"min_local_improvement": 0.0,
|
|
497
|
+
"val_evaluation_mode": "subsample",
|
|
498
|
+
"val_subsample_size": 64,
|
|
499
|
+
"candidate_selection_strategy": "coverage",
|
|
500
|
+
},
|
|
501
|
+
AdaptiveBatchLevel.HIGH: {
|
|
502
|
+
"reflection_minibatch_size": 2,
|
|
503
|
+
"min_local_improvement": 0.0,
|
|
504
|
+
"val_evaluation_mode": "subsample",
|
|
505
|
+
"val_subsample_size": 48,
|
|
506
|
+
"candidate_selection_strategy": "coverage",
|
|
507
|
+
},
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
|
|
511
|
+
def resolve_adaptive_pool_config(
|
|
512
|
+
*,
|
|
513
|
+
level: AdaptiveCurriculumLevel | str | None = None,
|
|
514
|
+
overrides: dict[str, Any] | None = None,
|
|
515
|
+
dev_pool_size: int | None = None,
|
|
516
|
+
) -> AdaptivePoolConfig:
|
|
517
|
+
"""Resolve adaptive pool config from level preset and overrides.
|
|
518
|
+
|
|
519
|
+
Args:
|
|
520
|
+
level: Preset level (NONE, LOW, MODERATE, HIGH). Defaults to LOW if None.
|
|
521
|
+
overrides: Dict of field overrides to apply on top of level defaults.
|
|
522
|
+
dev_pool_size: Optional dev pool size to cap pool_init_size if needed.
|
|
523
|
+
|
|
524
|
+
Returns:
|
|
525
|
+
AdaptivePoolConfig with resolved values.
|
|
526
|
+
"""
|
|
527
|
+
# Normalize level
|
|
528
|
+
if level is None:
|
|
529
|
+
level = AdaptiveCurriculumLevel.LOW
|
|
530
|
+
elif isinstance(level, str):
|
|
531
|
+
try:
|
|
532
|
+
level = AdaptiveCurriculumLevel[level.strip().upper()]
|
|
533
|
+
except KeyError:
|
|
534
|
+
valid_levels = ", ".join(level_item.name for level_item in AdaptiveCurriculumLevel)
|
|
535
|
+
raise ValueError(f"Invalid adaptive pool level '{level}'. Must be one of: {valid_levels}") from None
|
|
536
|
+
|
|
537
|
+
# Get defaults for level
|
|
538
|
+
defaults = _ADAPTIVE_POOL_DEFAULTS[level].copy()
|
|
539
|
+
|
|
540
|
+
# Apply overrides
|
|
541
|
+
if overrides:
|
|
542
|
+
defaults.update(overrides)
|
|
543
|
+
|
|
544
|
+
# Handle pool_init_size and pool_min_size with dev_pool_size
|
|
545
|
+
pool_init_size = defaults.get("pool_init_size")
|
|
546
|
+
pool_min_size = defaults.get("pool_min_size")
|
|
547
|
+
|
|
548
|
+
if pool_init_size is None:
|
|
549
|
+
pool_init_size = dev_pool_size
|
|
550
|
+
if pool_min_size is None:
|
|
551
|
+
pool_min_size = dev_pool_size
|
|
552
|
+
|
|
553
|
+
# Cap pool_init_size if dev_pool_size is provided
|
|
554
|
+
if dev_pool_size is not None and pool_init_size is not None and pool_init_size > dev_pool_size:
|
|
555
|
+
pool_init_size = dev_pool_size
|
|
556
|
+
|
|
557
|
+
# Handle heatup_reserve_pool (can be list, None, or single value)
|
|
558
|
+
heatup_reserve = defaults.get("heatup_reserve_pool")
|
|
559
|
+
if heatup_reserve is not None and not isinstance(heatup_reserve, list | tuple):
|
|
560
|
+
# Convert single value or other types to list
|
|
561
|
+
heatup_reserve = [heatup_reserve] if heatup_reserve else None
|
|
562
|
+
|
|
563
|
+
# Create config with proper types
|
|
564
|
+
config = AdaptivePoolConfig(
|
|
565
|
+
level=level,
|
|
566
|
+
anchor_size=int(defaults["anchor_size"]),
|
|
567
|
+
pool_init_size=None if pool_init_size is None else int(pool_init_size),
|
|
568
|
+
pool_min_size=None if pool_min_size is None else int(pool_min_size),
|
|
569
|
+
warmup_iters=int(defaults["warmup_iters"]),
|
|
570
|
+
anneal_stop_iter=int(defaults["anneal_stop_iter"]),
|
|
571
|
+
pool_update_period=int(defaults["pool_update_period"]),
|
|
572
|
+
min_evals_per_example=int(defaults["min_evals_per_example"]),
|
|
573
|
+
k_info_prompts=int(defaults["k_info_prompts"]),
|
|
574
|
+
info_buffer_factor=float(defaults["info_buffer_factor"]),
|
|
575
|
+
info_epsilon=float(defaults["info_epsilon"]),
|
|
576
|
+
anchor_selection_method=defaults["anchor_selection_method"] if defaults["anchor_selection_method"] in ("random", "clustering") else "clustering",
|
|
577
|
+
exploration_strategy=defaults["exploration_strategy"] if defaults["exploration_strategy"] in ("random", "diversity") else "diversity",
|
|
578
|
+
heatup_reserve_pool=list(heatup_reserve) if heatup_reserve else None,
|
|
579
|
+
heatup_trigger=defaults.get("heatup_trigger", "after_min_size") if defaults.get("heatup_trigger", "after_min_size") in ("after_min_size", "immediate", "every_N_trials_after_min") else "after_min_size",
|
|
580
|
+
heatup_size=int(defaults.get("heatup_size", 20)),
|
|
581
|
+
heatup_cooldown_trials=int(defaults.get("heatup_cooldown_trials", 50)),
|
|
582
|
+
heatup_schedule=defaults.get("heatup_schedule", "repeat") if defaults.get("heatup_schedule", "repeat") in ("repeat", "once") else "repeat",
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
return config
|
|
586
|
+
|
|
587
|
+
|
|
588
|
+
def resolve_adaptive_batch_config(
|
|
589
|
+
*,
|
|
590
|
+
level: AdaptiveBatchLevel | str | None = None,
|
|
591
|
+
overrides: dict[str, Any] | None = None,
|
|
592
|
+
) -> GEPAAdaptiveBatchConfig:
|
|
593
|
+
"""Resolve adaptive batch config from level preset and overrides.
|
|
594
|
+
|
|
595
|
+
Args:
|
|
596
|
+
level: Preset level (NONE, LOW, MODERATE, HIGH). Defaults to MODERATE if None.
|
|
597
|
+
overrides: Dict of field overrides to apply on top of level defaults.
|
|
598
|
+
|
|
599
|
+
Returns:
|
|
600
|
+
GEPAAdaptiveBatchConfig with resolved values.
|
|
601
|
+
"""
|
|
602
|
+
# Normalize level
|
|
603
|
+
if level is None:
|
|
604
|
+
level = AdaptiveBatchLevel.MODERATE
|
|
605
|
+
elif isinstance(level, str):
|
|
606
|
+
try:
|
|
607
|
+
level = AdaptiveBatchLevel[level.strip().upper()]
|
|
608
|
+
except KeyError:
|
|
609
|
+
valid_levels = ", ".join(level_item.name for level_item in AdaptiveBatchLevel)
|
|
610
|
+
raise ValueError(f"Invalid adaptive batch level '{level}'. Must be one of: {valid_levels}") from None
|
|
611
|
+
|
|
612
|
+
# Get defaults for level
|
|
613
|
+
defaults = _ADAPTIVE_BATCH_DEFAULTS[level].copy()
|
|
614
|
+
|
|
615
|
+
# Apply overrides
|
|
616
|
+
if overrides:
|
|
617
|
+
defaults.update(overrides)
|
|
618
|
+
|
|
619
|
+
# Create config with proper types
|
|
620
|
+
return GEPAAdaptiveBatchConfig(
|
|
621
|
+
level=level,
|
|
622
|
+
reflection_minibatch_size=int(defaults["reflection_minibatch_size"]),
|
|
623
|
+
min_local_improvement=float(defaults["min_local_improvement"]),
|
|
624
|
+
val_evaluation_mode=defaults["val_evaluation_mode"] if defaults["val_evaluation_mode"] in ("full", "subsample") else "full",
|
|
625
|
+
val_subsample_size=int(defaults["val_subsample_size"]),
|
|
626
|
+
candidate_selection_strategy=defaults["candidate_selection_strategy"] if defaults["candidate_selection_strategy"] in ("coverage", "random") else "coverage",
|
|
627
|
+
)
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
class MIPROConfig(ExtraModel):
|
|
631
|
+
"""MIPRO-specific configuration.
|
|
632
|
+
|
|
633
|
+
MIPROv2 uses meta-learning with bootstrap phase, TPE optimization, and mini-batch evaluation
|
|
634
|
+
to efficiently optimize prompts with fewer evaluations than genetic algorithms.
|
|
635
|
+
|
|
636
|
+
Attributes:
|
|
637
|
+
proposer_effort: Effort level for proposer model selection. Controls which model
|
|
638
|
+
is used for generating prompt proposals. Default: "LOW".
|
|
639
|
+
Options:
|
|
640
|
+
- "LOW_CONTEXT": Uses gpt-oss-120b (Groq) with minimal context. Fastest/cheapest.
|
|
641
|
+
Required when proposer_output_tokens="RAPID".
|
|
642
|
+
- "LOW": Uses smaller/faster models (e.g., gpt-4o-mini). Good balance.
|
|
643
|
+
- "MEDIUM": Uses medium models (e.g., gpt-4o). Higher quality proposals.
|
|
644
|
+
- "HIGH": Uses best models (e.g., gpt-5). Highest quality but expensive.
|
|
645
|
+
proposer_output_tokens: Maximum output tokens allowed for proposer model.
|
|
646
|
+
Default: "FAST". Controls proposal length and cost.
|
|
647
|
+
Options:
|
|
648
|
+
- "RAPID": 3000 tokens max. Fastest/cheapest. Requires proposer_effort="LOW_CONTEXT"
|
|
649
|
+
and gpt-oss-120b model. Use for short, focused proposals.
|
|
650
|
+
- "FAST": 10000 tokens max. Good balance. Works with any effort level.
|
|
651
|
+
- "SLOW": 25000 tokens max. Allows longer proposals. Use for complex prompts.
|
|
652
|
+
min_bootstrap_demos: Minimum number of qualified bootstrap demonstrations required.
|
|
653
|
+
Default: None (no minimum). If set, bootstrap phase will fail early if fewer than
|
|
654
|
+
this many demos pass the few_shot_score_threshold. Use with strict_bootstrap=True
|
|
655
|
+
for fail-fast behavior.
|
|
656
|
+
strict_bootstrap: If True, fail immediately when bootstrap doesn't produce enough
|
|
657
|
+
qualified demos (< min_bootstrap_demos). Default: False. When False, optimization
|
|
658
|
+
continues but may produce suboptimal results with insufficient demos.
|
|
659
|
+
"""
|
|
660
|
+
task_app_url: str | None = None
|
|
661
|
+
task_app_api_key: str | None = None
|
|
662
|
+
task_app_id: str | None = None
|
|
663
|
+
num_iterations: int = 20
|
|
664
|
+
num_evaluations_per_iteration: int = 5
|
|
665
|
+
batch_size: int = 32
|
|
666
|
+
max_concurrent: int = 20
|
|
667
|
+
env_name: str = "banking77"
|
|
668
|
+
env_config: dict[str, Any] | None = None
|
|
669
|
+
few_shot_score_threshold: float = 0.8
|
|
670
|
+
results_file: str | None = None
|
|
671
|
+
max_wall_clock_seconds: float | None = None
|
|
672
|
+
max_total_tokens: int | None = None
|
|
673
|
+
policy_config: dict[str, Any] | None = None
|
|
674
|
+
meta: MIPROMetaConfig | dict[str, Any] | None = None
|
|
675
|
+
modules: list[MIPROModuleConfig] | list[dict[str, Any]] | None = None
|
|
676
|
+
seeds: MIPROSeedConfig | dict[str, Any] | None = None
|
|
677
|
+
|
|
678
|
+
# Proposer configuration
|
|
679
|
+
proposer_effort: Literal["LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"] = "LOW"
|
|
680
|
+
proposer_output_tokens: Literal["RAPID", "FAST", "SLOW"] = "FAST"
|
|
681
|
+
|
|
682
|
+
# Token and budget configuration (mirrors GEPA pattern)
|
|
683
|
+
max_token_limit: int | None = None # Total tokens across all rollouts (policy + proposer)
|
|
684
|
+
max_spend_usd: float | None = None # Maximum spend in USD
|
|
685
|
+
token_counting_model: str = "gpt-4" # Model for token estimation (tiktoken)
|
|
686
|
+
enforce_token_limit: bool = True # Halt optimization if limit exceeded
|
|
687
|
+
|
|
688
|
+
# TPE configuration
|
|
689
|
+
tpe: dict[str, Any] | None = None
|
|
690
|
+
|
|
691
|
+
# Demo configuration
|
|
692
|
+
demo: dict[str, Any] | None = None
|
|
693
|
+
|
|
694
|
+
# Grounding configuration
|
|
695
|
+
grounding: dict[str, Any] | None = None
|
|
696
|
+
|
|
697
|
+
# Meta-update configuration
|
|
698
|
+
meta_update: dict[str, Any] | None = None
|
|
699
|
+
|
|
700
|
+
# Judge configuration (shared with GEPA)
|
|
701
|
+
judge: PromptLearningJudgeConfig | dict[str, Any] | None = None
|
|
702
|
+
|
|
703
|
+
# Proxy models configuration (optional, can also be at top-level)
|
|
704
|
+
proxy_models: ProxyModelsConfig | dict[str, Any] | None = None
|
|
705
|
+
|
|
706
|
+
# Adaptive pool configuration (optional)
|
|
707
|
+
adaptive_pool: AdaptivePoolConfig | dict[str, Any] | None = None
|
|
708
|
+
|
|
709
|
+
# System spec configuration
|
|
710
|
+
spec_path: str | None = None # Path to system spec JSON file
|
|
711
|
+
spec_max_tokens: int = 5000 # Max tokens for spec context in meta-prompt
|
|
712
|
+
spec_include_examples: bool = True # Include examples from spec
|
|
713
|
+
spec_priority_threshold: int | None = None # Only include rules with priority >= threshold
|
|
714
|
+
# Custom metaprompt (optional)
|
|
715
|
+
metaprompt: str | None = None # Custom metaprompt text to include in instruction generation prompts
|
|
716
|
+
|
|
717
|
+
# Bootstrap seeds (for few-shot examples)
|
|
718
|
+
bootstrap_train_seeds: list[int] | None = None
|
|
719
|
+
|
|
720
|
+
# Online pool (for mini-batch evaluation)
|
|
721
|
+
online_pool: list[int] | None = None
|
|
722
|
+
|
|
723
|
+
# Test pool (held-out seeds)
|
|
724
|
+
test_pool: list[int] | None = None
|
|
725
|
+
|
|
726
|
+
# Reference pool (for dataset context in meta-prompt, must not overlap with train/test)
|
|
727
|
+
reference_pool: list[int] | None = None
|
|
728
|
+
|
|
729
|
+
# Strict bootstrap mode: minimum qualified demos required
|
|
730
|
+
# If fewer demos qualify (score >= few_shot_score_threshold), job fails early with clear error
|
|
731
|
+
# Default: 0 (no minimum - current behavior for backwards compatibility)
|
|
732
|
+
min_bootstrap_demos: int = 0
|
|
733
|
+
|
|
734
|
+
@model_validator(mode="before")
|
|
735
|
+
@classmethod
|
|
736
|
+
def _forbid_meta_model_config(cls, data: dict[str, Any]) -> dict[str, Any]:
|
|
737
|
+
"""Forbid deprecated meta_model configuration fields.
|
|
738
|
+
|
|
739
|
+
Meta-model selection is now controlled by proposer_effort and proposer_output_tokens.
|
|
740
|
+
The backend automatically selects the model based on these settings.
|
|
741
|
+
"""
|
|
742
|
+
if not isinstance(data, dict):
|
|
743
|
+
return data
|
|
744
|
+
|
|
745
|
+
deprecated_meta_fields = {
|
|
746
|
+
"meta_model": "Meta-model selection is now controlled by 'proposer_effort' (LOW_CONTEXT, LOW, MEDIUM, HIGH). Remove 'meta_model' from your config.",
|
|
747
|
+
"meta_model_provider": "Meta-model provider is now controlled by 'proposer_effort'. Remove 'meta_model_provider' from your config.",
|
|
748
|
+
"meta_model_inference_url": "Meta-model inference URL is now controlled by 'proposer_effort'. Remove 'meta_model_inference_url' from your config.",
|
|
749
|
+
"meta_model_temperature": "Meta-model temperature is now controlled by 'proposer_effort'. Remove 'meta_model_temperature' from your config.",
|
|
750
|
+
"meta_model_max_tokens": "Meta-model max_tokens is now controlled by 'proposer_effort' and 'proposer_output_tokens'. Remove 'meta_model_max_tokens' from your config.",
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
for field, message in deprecated_meta_fields.items():
|
|
754
|
+
if field in data and data[field] is not None:
|
|
755
|
+
raise ValueError(f"Deprecated field '{field}': {message}")
|
|
756
|
+
|
|
757
|
+
# Also check in nested meta section
|
|
758
|
+
if "meta" in data and isinstance(data["meta"], dict):
|
|
759
|
+
meta_data = data["meta"]
|
|
760
|
+
if meta_data.get("model") is not None:
|
|
761
|
+
raise ValueError("Deprecated field 'meta.model': Meta-model selection is now controlled by 'proposer_effort'. Remove [prompt_learning.mipro.meta] section.")
|
|
762
|
+
if meta_data.get("provider") is not None:
|
|
763
|
+
raise ValueError("Deprecated field 'meta.provider': Meta-model provider is now controlled by 'proposer_effort'. Remove [prompt_learning.mipro.meta] section.")
|
|
764
|
+
|
|
765
|
+
return data
|
|
766
|
+
|
|
767
|
+
@field_validator("bootstrap_train_seeds", "online_pool", "test_pool", "reference_pool", mode="before")
|
|
768
|
+
@classmethod
|
|
769
|
+
def _parse_mipro_seed_lists(cls, v: Any) -> list[int] | None:
|
|
770
|
+
"""Parse MIPRO seed lists that can be either a list or range dict."""
|
|
771
|
+
return _parse_seeds(v)
|
|
772
|
+
|
|
773
|
+
@classmethod
|
|
774
|
+
def simple(
|
|
775
|
+
cls,
|
|
776
|
+
*,
|
|
777
|
+
task_app_url: str,
|
|
778
|
+
task_app_api_key: str,
|
|
779
|
+
env_name: str,
|
|
780
|
+
rollout_budget: int,
|
|
781
|
+
initial_prompt_messages: Sequence[Mapping[str, Any]] | Sequence[Any],
|
|
782
|
+
task_app_id: str | None = None,
|
|
783
|
+
bootstrap_seeds: list[int] | None = None,
|
|
784
|
+
online_seeds: list[int] | None = None,
|
|
785
|
+
test_seeds: list[int] | None = None,
|
|
786
|
+
reference_pool: list[int] | None = None,
|
|
787
|
+
env_config: dict[str, Any] | None = None,
|
|
788
|
+
num_iterations: int | None = None,
|
|
789
|
+
num_evaluations_per_iteration: int | None = None,
|
|
790
|
+
batch_size: int | None = None,
|
|
791
|
+
max_concurrent: int | None = None,
|
|
792
|
+
meta_preset: Literal["fast", "balanced", "high_quality"] = "balanced",
|
|
793
|
+
policy_model: str = "openai/gpt-oss-20b",
|
|
794
|
+
policy_provider: str = "groq",
|
|
795
|
+
policy_temperature: float = 1.0,
|
|
796
|
+
policy_max_completion_tokens: int = 512,
|
|
797
|
+
policy_name: str | None = None,
|
|
798
|
+
meta_model: str | None = None,
|
|
799
|
+
meta_provider: str | None = None,
|
|
800
|
+
meta_inference_url: str | None = None,
|
|
801
|
+
) -> MIPROConfig:
|
|
802
|
+
"""Convenience constructor for single-stage MIPRO tasks.
|
|
803
|
+
|
|
804
|
+
Automatically infers reasonable defaults for seeds, iterations, and module layout
|
|
805
|
+
based on the rollout budget. This keeps simple benchmarks (e.g., Iris) readable
|
|
806
|
+
while leaving the full constructor available for complex multi-stage pipelines.
|
|
807
|
+
"""
|
|
808
|
+
if rollout_budget <= 0:
|
|
809
|
+
raise ValueError("rollout_budget must be positive for MIPROConfig.simple()")
|
|
810
|
+
normalized_messages = _normalize_messages(initial_prompt_messages)
|
|
811
|
+
if not normalized_messages:
|
|
812
|
+
raise ValueError("initial_prompt_messages must contain at least one message")
|
|
813
|
+
|
|
814
|
+
bootstrap = bootstrap_seeds or _auto_calculate_bootstrap_seeds(rollout_budget)
|
|
815
|
+
online = online_seeds or _auto_calculate_online_seeds(rollout_budget)
|
|
816
|
+
tests = test_seeds or []
|
|
817
|
+
reference = reference_pool or _auto_calculate_reference_pool(rollout_budget)
|
|
818
|
+
|
|
819
|
+
iterations = num_iterations or _auto_calculate_iterations(rollout_budget)
|
|
820
|
+
evals_per_iteration = (
|
|
821
|
+
num_evaluations_per_iteration
|
|
822
|
+
or _auto_calculate_evaluations_per_iteration(rollout_budget)
|
|
823
|
+
)
|
|
824
|
+
derived_batch_size = batch_size or max(1, min(len(online), 32))
|
|
825
|
+
derived_max_concurrent = max_concurrent or 10
|
|
826
|
+
|
|
827
|
+
baseline_instruction = _extract_baseline_instruction(normalized_messages)
|
|
828
|
+
meta_config = _create_meta_config_from_preset(meta_preset)
|
|
829
|
+
if meta_model:
|
|
830
|
+
meta_config.model = meta_model
|
|
831
|
+
if meta_provider:
|
|
832
|
+
meta_config.provider = meta_provider
|
|
833
|
+
if meta_inference_url is not None:
|
|
834
|
+
meta_config.inference_url = meta_inference_url
|
|
835
|
+
|
|
836
|
+
stage = MIPROStageConfig(
|
|
837
|
+
stage_id="default_stage_0",
|
|
838
|
+
baseline_instruction=baseline_instruction,
|
|
839
|
+
baseline_messages=normalized_messages,
|
|
840
|
+
)
|
|
841
|
+
module = MIPROModuleConfig(
|
|
842
|
+
module_id="default",
|
|
843
|
+
stages=[stage],
|
|
844
|
+
)
|
|
845
|
+
seeds = MIPROSeedConfig(
|
|
846
|
+
bootstrap=bootstrap,
|
|
847
|
+
online=online,
|
|
848
|
+
test=tests,
|
|
849
|
+
reference=reference,
|
|
850
|
+
)
|
|
851
|
+
policy_config = {
|
|
852
|
+
"model": policy_model,
|
|
853
|
+
"provider": policy_provider,
|
|
854
|
+
"temperature": policy_temperature,
|
|
855
|
+
"max_completion_tokens": policy_max_completion_tokens,
|
|
856
|
+
}
|
|
857
|
+
if policy_name:
|
|
858
|
+
policy_config["policy_name"] = policy_name
|
|
859
|
+
|
|
860
|
+
return cls(
|
|
861
|
+
task_app_url=task_app_url,
|
|
862
|
+
task_app_api_key=task_app_api_key,
|
|
863
|
+
task_app_id=task_app_id or env_name,
|
|
864
|
+
env_name=env_name,
|
|
865
|
+
env_config=env_config,
|
|
866
|
+
seeds=seeds,
|
|
867
|
+
num_iterations=iterations,
|
|
868
|
+
num_evaluations_per_iteration=evals_per_iteration,
|
|
869
|
+
batch_size=derived_batch_size,
|
|
870
|
+
max_concurrent=derived_max_concurrent,
|
|
871
|
+
policy_config=policy_config,
|
|
872
|
+
meta=meta_config,
|
|
873
|
+
modules=[module],
|
|
874
|
+
)
|
|
875
|
+
|
|
876
|
+
|
|
877
|
+
def _auto_calculate_bootstrap_seeds(rollout_budget: int) -> list[int]:
|
|
878
|
+
"""Auto-calculate bootstrap seeds from rollout budget."""
|
|
879
|
+
count = max(3, min(10, max(rollout_budget // 10, 1)))
|
|
880
|
+
return list(range(count))
|
|
881
|
+
|
|
882
|
+
|
|
883
|
+
def _auto_calculate_online_seeds(rollout_budget: int) -> list[int]:
|
|
884
|
+
"""Auto-calculate online pool seeds from rollout budget."""
|
|
885
|
+
count = max(5, min(50, max(rollout_budget // 3, 1)))
|
|
886
|
+
return list(range(10, 10 + count))
|
|
887
|
+
|
|
888
|
+
|
|
889
|
+
def _auto_calculate_reference_pool(rollout_budget: int) -> list[int]:
|
|
890
|
+
"""Auto-calculate reference pool seeds from rollout budget."""
|
|
891
|
+
count = max(5, min(30, max(rollout_budget // 5, 1)))
|
|
892
|
+
return list(range(20, 20 + count))
|
|
893
|
+
|
|
894
|
+
|
|
895
|
+
def _auto_calculate_iterations(rollout_budget: int) -> int:
|
|
896
|
+
"""Auto-calculate number of optimization iterations."""
|
|
897
|
+
online_pool_size = max(5, min(50, max(rollout_budget // 3, 1)))
|
|
898
|
+
evals_per_iteration = max(3, min(10, max(rollout_budget // max(online_pool_size * 2, 1), 1)))
|
|
899
|
+
iterations = max(5, min(20, max(rollout_budget // max(online_pool_size * evals_per_iteration, 1), 1)))
|
|
900
|
+
return iterations
|
|
901
|
+
|
|
902
|
+
|
|
903
|
+
def _auto_calculate_evaluations_per_iteration(rollout_budget: int) -> int:
|
|
904
|
+
"""Auto-calculate number of evaluations per iteration."""
|
|
905
|
+
online_pool_size = max(5, min(50, max(rollout_budget // 3, 1)))
|
|
906
|
+
iterations = max(5, min(20, max(rollout_budget // max(online_pool_size * 5, 1), 1)))
|
|
907
|
+
evals_per_iteration = max(3, min(10, max(rollout_budget // max(online_pool_size * iterations, 1), 1)))
|
|
908
|
+
return evals_per_iteration
|
|
909
|
+
|
|
910
|
+
|
|
911
|
+
def _coerce_message_mapping(message: Mapping[str, Any] | Any) -> dict[str, Any]:
|
|
912
|
+
"""Convert message objects or dicts into a mutable dict."""
|
|
913
|
+
if isinstance(message, Mapping):
|
|
914
|
+
return dict(message)
|
|
915
|
+
if hasattr(message, "model_dump"):
|
|
916
|
+
try:
|
|
917
|
+
data = message.model_dump()
|
|
918
|
+
if isinstance(data, dict):
|
|
919
|
+
return data
|
|
920
|
+
except Exception: # pragma: no cover - defensive
|
|
921
|
+
pass
|
|
922
|
+
if hasattr(message, "__dict__"):
|
|
923
|
+
try:
|
|
924
|
+
return {
|
|
925
|
+
key: value
|
|
926
|
+
for key, value in vars(message).items()
|
|
927
|
+
if not key.startswith("_")
|
|
928
|
+
}
|
|
929
|
+
except Exception: # pragma: no cover - defensive
|
|
930
|
+
return {}
|
|
931
|
+
return {}
|
|
932
|
+
|
|
933
|
+
|
|
934
|
+
def _extract_baseline_instruction(messages: Sequence[Mapping[str, str]] | Sequence[Any]) -> str:
|
|
935
|
+
"""Extract the baseline instruction string from message templates."""
|
|
936
|
+
for raw in messages:
|
|
937
|
+
msg = _coerce_message_mapping(raw)
|
|
938
|
+
if msg.get("role", "user") == "system":
|
|
939
|
+
text = (msg.get("content") or msg.get("pattern") or "").strip()
|
|
940
|
+
if text:
|
|
941
|
+
return text
|
|
942
|
+
for raw in messages:
|
|
943
|
+
msg = _coerce_message_mapping(raw)
|
|
944
|
+
if msg.get("role", "user") == "user":
|
|
945
|
+
text = (msg.get("content") or msg.get("pattern") or "").strip()
|
|
946
|
+
if text:
|
|
947
|
+
return text
|
|
948
|
+
return "Complete the task."
|
|
949
|
+
|
|
950
|
+
|
|
951
|
+
def _normalize_messages(messages: Sequence[Mapping[str, str]] | Sequence[Any]) -> list[dict[str, str]]:
|
|
952
|
+
"""Normalize message dictionaries so downstream tools can rely on `content`."""
|
|
953
|
+
normalized: list[dict[str, str]] = []
|
|
954
|
+
for raw in messages:
|
|
955
|
+
msg = _coerce_message_mapping(raw)
|
|
956
|
+
role = msg.get("role", "user") or "user"
|
|
957
|
+
content = msg.get("content") or msg.get("pattern") or ""
|
|
958
|
+
normalized.append({"role": str(role), "content": str(content)})
|
|
959
|
+
return normalized
|
|
960
|
+
|
|
961
|
+
|
|
962
|
+
def _create_meta_config_from_preset(preset: str) -> MIPROMetaConfig:
|
|
963
|
+
"""Create a meta config preset (fast/balanced/high_quality)."""
|
|
964
|
+
preset_key = preset.lower().strip()
|
|
965
|
+
presets: dict[str, MIPROMetaConfig] = {
|
|
966
|
+
"fast": MIPROMetaConfig(
|
|
967
|
+
model="gpt-4o-mini",
|
|
968
|
+
provider="openai",
|
|
969
|
+
temperature=0.7,
|
|
970
|
+
max_tokens=512,
|
|
971
|
+
inference_url=None,
|
|
972
|
+
),
|
|
973
|
+
"balanced": MIPROMetaConfig(
|
|
974
|
+
model="gpt-4o-mini",
|
|
975
|
+
provider="openai",
|
|
976
|
+
temperature=0.8,
|
|
977
|
+
max_tokens=1024,
|
|
978
|
+
inference_url=None,
|
|
979
|
+
),
|
|
980
|
+
"high_quality": MIPROMetaConfig(
|
|
981
|
+
model="gpt-4o",
|
|
982
|
+
provider="openai",
|
|
983
|
+
temperature=0.9,
|
|
984
|
+
max_tokens=2048,
|
|
985
|
+
inference_url=None,
|
|
986
|
+
),
|
|
987
|
+
}
|
|
988
|
+
return presets.get(preset_key, presets["balanced"])
|
|
989
|
+
|
|
990
|
+
|
|
991
|
+
# GEPA nested configs (mirroring RL structure)
|
|
992
|
+
class GEPARolloutConfig(ExtraModel):
|
|
993
|
+
"""GEPA rollout configuration (mirrors RL [rollout] section)."""
|
|
994
|
+
budget: int | None = None # Total rollout budget
|
|
995
|
+
max_concurrent: int = 20 # Maximum concurrent rollouts
|
|
996
|
+
minibatch_size: int = 8 # Minibatch size for evaluation
|
|
997
|
+
|
|
998
|
+
|
|
999
|
+
class GEPAEvaluationConfig(ExtraModel):
|
|
1000
|
+
"""GEPA evaluation configuration (mirrors RL [evaluation] section)."""
|
|
1001
|
+
seeds: list[int] | None = None # Evaluation seeds (training set)
|
|
1002
|
+
validation_seeds: list[int] | None = None # Validation seeds (held-out)
|
|
1003
|
+
test_pool: list[int] | None = None # Test pool (final evaluation)
|
|
1004
|
+
validation_pool: str | None = None # Pool name for validation (e.g., "validation")
|
|
1005
|
+
validation_top_k: int | None = None # Top-K prompts to validate
|
|
1006
|
+
|
|
1007
|
+
@field_validator("seeds", "validation_seeds", "test_pool", mode="before")
|
|
1008
|
+
@classmethod
|
|
1009
|
+
def _parse_seed_lists(cls, v: Any) -> list[int] | None:
|
|
1010
|
+
"""Parse seed lists that can be either a list or range dict."""
|
|
1011
|
+
return _parse_seeds(v)
|
|
1012
|
+
|
|
1013
|
+
|
|
1014
|
+
class GEPAMutationConfig(ExtraModel):
|
|
1015
|
+
"""GEPA mutation configuration.
|
|
1016
|
+
|
|
1017
|
+
NOTE: Mutation model selection is controlled by proposer_effort, NOT llm_model.
|
|
1018
|
+
The llm_model/llm_provider fields are deprecated and should not be used.
|
|
1019
|
+
"""
|
|
1020
|
+
rate: float = 0.3 # Mutation rate
|
|
1021
|
+
llm_model: str | None = None # DEPRECATED: Use proposer_effort instead
|
|
1022
|
+
llm_provider: str | None = None # DEPRECATED: Use proposer_effort instead
|
|
1023
|
+
llm_inference_url: str | None = None # DEPRECATED: Not used
|
|
1024
|
+
prompt: str | None = None # Custom mutation prompt
|
|
1025
|
+
|
|
1026
|
+
@model_validator(mode="before")
|
|
1027
|
+
@classmethod
|
|
1028
|
+
def _forbid_mutation_llm_config(cls, data: dict[str, Any]) -> dict[str, Any]:
|
|
1029
|
+
"""Forbid deprecated mutation LLM configuration fields.
|
|
1030
|
+
|
|
1031
|
+
Mutation model selection is now controlled by proposer_effort at the gepa level.
|
|
1032
|
+
"""
|
|
1033
|
+
if not isinstance(data, dict):
|
|
1034
|
+
return data
|
|
1035
|
+
|
|
1036
|
+
deprecated_mutation_fields = {
|
|
1037
|
+
"llm_model": "Mutation model selection is now controlled by 'proposer_effort' (LOW_CONTEXT, LOW, MEDIUM, HIGH) at [prompt_learning.gepa] level. Remove 'llm_model' from [prompt_learning.gepa.mutation].",
|
|
1038
|
+
"llm_provider": "Mutation provider is now controlled by 'proposer_effort'. Remove 'llm_provider' from [prompt_learning.gepa.mutation].",
|
|
1039
|
+
"llm_inference_url": "Mutation inference URL is not used. Remove 'llm_inference_url' from [prompt_learning.gepa.mutation].",
|
|
1040
|
+
}
|
|
1041
|
+
|
|
1042
|
+
for field, message in deprecated_mutation_fields.items():
|
|
1043
|
+
if field in data and data[field] is not None:
|
|
1044
|
+
raise ValueError(f"Deprecated field '{field}': {message}")
|
|
1045
|
+
|
|
1046
|
+
return data
|
|
1047
|
+
|
|
1048
|
+
|
|
1049
|
+
class GEPAPopulationConfig(ExtraModel):
|
|
1050
|
+
"""GEPA population configuration (evolution parameters)."""
|
|
1051
|
+
initial_size: int = 20 # Initial population size
|
|
1052
|
+
num_generations: int = 10 # Number of generations
|
|
1053
|
+
children_per_generation: int = 5 # Children generated per generation
|
|
1054
|
+
crossover_rate: float = 0.5 # Crossover rate
|
|
1055
|
+
selection_pressure: float = 1.0 # Pareto selection pressure
|
|
1056
|
+
patience_generations: int = 3 # Early stopping patience
|
|
1057
|
+
|
|
1058
|
+
|
|
1059
|
+
class GEPAArchiveConfig(ExtraModel):
|
|
1060
|
+
"""GEPA archive configuration (Pareto archive settings)."""
|
|
1061
|
+
size: int = 64 # Archive size
|
|
1062
|
+
pareto_set_size: int = 64 # Pareto set size
|
|
1063
|
+
pareto_eps: float = 1e-6 # Pareto epsilon
|
|
1064
|
+
feedback_fraction: float = 0.5 # Fraction of archive for feedback
|
|
1065
|
+
|
|
1066
|
+
|
|
1067
|
+
class GEPATokenConfig(ExtraModel):
|
|
1068
|
+
"""GEPA token and budget configuration."""
|
|
1069
|
+
max_limit: int | None = None # Maximum tokens allowed in prompt
|
|
1070
|
+
counting_model: str = "gpt-4" # Model for token counting
|
|
1071
|
+
enforce_pattern_limit: bool = True # Enforce token limit on patterns
|
|
1072
|
+
max_spend_usd: float | None = None # Maximum spend in USD
|
|
1073
|
+
|
|
1074
|
+
|
|
1075
|
+
class GEPAModuleConfig(ExtraModel):
|
|
1076
|
+
"""Configuration for a single GEPA pipeline module/stage (instruction-only).
|
|
1077
|
+
|
|
1078
|
+
Each module MUST have its own policy configuration. The policy field is required
|
|
1079
|
+
and must include 'model' and 'provider' fields.
|
|
1080
|
+
"""
|
|
1081
|
+
module_id: str
|
|
1082
|
+
max_instruction_slots: int = 3
|
|
1083
|
+
allowed_tools: list[str] | None = None
|
|
1084
|
+
max_tokens: int | None = None
|
|
1085
|
+
policy: PromptLearningPolicyConfig | dict[str, Any] = Field(
|
|
1086
|
+
...,
|
|
1087
|
+
description="Required per-module policy configuration. Must include 'model' and 'provider' fields."
|
|
1088
|
+
)
|
|
1089
|
+
|
|
1090
|
+
@field_validator("module_id")
|
|
1091
|
+
@classmethod
|
|
1092
|
+
def _validate_module_id(cls, v: str) -> str:
|
|
1093
|
+
v = v.strip()
|
|
1094
|
+
if not v:
|
|
1095
|
+
raise ValueError("module_id cannot be empty")
|
|
1096
|
+
return v
|
|
1097
|
+
|
|
1098
|
+
@field_validator("max_instruction_slots")
|
|
1099
|
+
@classmethod
|
|
1100
|
+
def _validate_slots(cls, v: int) -> int:
|
|
1101
|
+
if v < 1:
|
|
1102
|
+
raise ValueError("max_instruction_slots must be >= 1")
|
|
1103
|
+
return v
|
|
1104
|
+
|
|
1105
|
+
@field_validator("policy", mode="before")
|
|
1106
|
+
@classmethod
|
|
1107
|
+
def _validate_policy(cls, v: Any) -> dict[str, Any]:
|
|
1108
|
+
"""Validate that policy is a dict with required fields."""
|
|
1109
|
+
if v is None:
|
|
1110
|
+
raise ValueError("policy is required for each module/stage")
|
|
1111
|
+
if isinstance(v, dict):
|
|
1112
|
+
if not v.get("model"):
|
|
1113
|
+
raise ValueError("policy must include 'model' field")
|
|
1114
|
+
if not v.get("provider"):
|
|
1115
|
+
raise ValueError("policy must include 'provider' field")
|
|
1116
|
+
return v
|
|
1117
|
+
# If it's already a PromptLearningPolicyConfig, it will be validated by Pydantic
|
|
1118
|
+
return v
|
|
1119
|
+
|
|
1120
|
+
|
|
1121
|
+
class GEPAConfig(ExtraModel):
|
|
1122
|
+
"""GEPA-specific configuration with nested subsections.
|
|
1123
|
+
|
|
1124
|
+
GEPA (Genetic Evolution of Prompt Architectures) uses evolutionary algorithms
|
|
1125
|
+
with LLM-guided mutations to optimize prompts through population-based search.
|
|
1126
|
+
|
|
1127
|
+
Attributes:
|
|
1128
|
+
proposer_type: Type of proposer to use for generating mutations.
|
|
1129
|
+
Default: "dspy". Options: "dspy" (DSPy-style proposer) or "spec" (spec-based).
|
|
1130
|
+
proposer_effort: Effort level for proposer model selection. Controls which model
|
|
1131
|
+
is used for generating prompt mutations. Default: "LOW".
|
|
1132
|
+
Options:
|
|
1133
|
+
- "LOW_CONTEXT": Uses gpt-oss-120b (Groq) with minimal context. Fastest/cheapest.
|
|
1134
|
+
Required when proposer_output_tokens="RAPID".
|
|
1135
|
+
- "LOW": Uses smaller/faster models (e.g., gpt-4o-mini). Good balance.
|
|
1136
|
+
- "MEDIUM": Uses medium models (e.g., gpt-4o). Higher quality mutations.
|
|
1137
|
+
- "HIGH": Uses best models (e.g., gpt-5). Highest quality but expensive.
|
|
1138
|
+
proposer_output_tokens: Maximum output tokens allowed for proposer model.
|
|
1139
|
+
Default: "FAST". Controls mutation length and cost.
|
|
1140
|
+
Options:
|
|
1141
|
+
- "RAPID": 3000 tokens max. Fastest/cheapest. Requires proposer_effort="LOW_CONTEXT"
|
|
1142
|
+
and gpt-oss-120b model. Use for short, focused mutations.
|
|
1143
|
+
- "FAST": 10000 tokens max. Good balance. Works with any effort level.
|
|
1144
|
+
- "SLOW": 25000 tokens max. Allows longer mutations. Use for complex prompts.
|
|
1145
|
+
metaprompt: Optional custom metaprompt text to include in mutation prompts.
|
|
1146
|
+
Default: None. If provided, replaces default metaprompt template.
|
|
1147
|
+
"""
|
|
1148
|
+
# Top-level fields (for backwards compatibility)
|
|
1149
|
+
env_name: str = "banking77"
|
|
1150
|
+
env_config: dict[str, Any] | None = None
|
|
1151
|
+
rng_seed: int | None = None
|
|
1152
|
+
proposer_type: str = "dspy"
|
|
1153
|
+
proposer_effort: Literal["LOW_CONTEXT", "LOW", "MEDIUM", "HIGH"] = "LOW"
|
|
1154
|
+
proposer_output_tokens: Literal["RAPID", "FAST", "SLOW"] = "FAST"
|
|
1155
|
+
# Custom metaprompt (optional)
|
|
1156
|
+
metaprompt: str | None = None
|
|
1157
|
+
|
|
1158
|
+
# Multi-stage pipeline support
|
|
1159
|
+
modules: list[GEPAModuleConfig] | None = None
|
|
1160
|
+
|
|
1161
|
+
# Nested subsections (preferred, mirrors RL structure)
|
|
1162
|
+
rollout: GEPARolloutConfig | None = None
|
|
1163
|
+
evaluation: GEPAEvaluationConfig | None = None
|
|
1164
|
+
mutation: GEPAMutationConfig | None = None
|
|
1165
|
+
population: GEPAPopulationConfig | None = None
|
|
1166
|
+
archive: GEPAArchiveConfig | None = None
|
|
1167
|
+
token: GEPATokenConfig | None = None
|
|
1168
|
+
judge: PromptLearningJudgeConfig | dict[str, Any] | None = None
|
|
1169
|
+
proxy_models: ProxyModelsConfig | dict[str, Any] | None = None # Proxy models config (can be at top-level or gepa-specific)
|
|
1170
|
+
adaptive_pool: AdaptivePoolConfig | dict[str, Any] | None = None # Adaptive pooling config
|
|
1171
|
+
adaptive_batch: GEPAAdaptiveBatchConfig | dict[str, Any] | None = None # Adaptive batch config (GEPA only)
|
|
1172
|
+
|
|
1173
|
+
# Backwards compatibility: flat fields (DEPRECATED - DO NOT USE)
|
|
1174
|
+
# These are kept for backwards compatibility with _get_* methods but should not be used directly
|
|
1175
|
+
rollout_budget: int | None = None
|
|
1176
|
+
max_concurrent_rollouts: int | None = None
|
|
1177
|
+
minibatch_size: int | None = None
|
|
1178
|
+
evaluation_seeds: list[int] | None = None
|
|
1179
|
+
validation_seeds: list[int] | None = None
|
|
1180
|
+
test_pool: list[int] | None = None
|
|
1181
|
+
validation_pool: str | None = None
|
|
1182
|
+
validation_top_k: int | None = None
|
|
1183
|
+
mutation_rate: float | None = None
|
|
1184
|
+
mutation_llm_model: str | None = None
|
|
1185
|
+
mutation_llm_provider: str | None = None
|
|
1186
|
+
mutation_llm_inference_url: str | None = None
|
|
1187
|
+
mutation_prompt: str | None = None
|
|
1188
|
+
initial_population_size: int | None = None
|
|
1189
|
+
num_generations: int | None = None
|
|
1190
|
+
children_per_generation: int | None = None
|
|
1191
|
+
crossover_rate: float | None = None
|
|
1192
|
+
selection_pressure: float | None = None
|
|
1193
|
+
patience_generations: int | None = None
|
|
1194
|
+
archive_size: int | None = None
|
|
1195
|
+
pareto_set_size: int | None = None
|
|
1196
|
+
pareto_eps: float | None = None
|
|
1197
|
+
feedback_fraction: float | None = None
|
|
1198
|
+
max_token_limit: int | None = None
|
|
1199
|
+
token_counting_model: str | None = None
|
|
1200
|
+
enforce_pattern_token_limit: bool | None = None
|
|
1201
|
+
max_spend_usd: float | None = None
|
|
1202
|
+
|
|
1203
|
+
@model_validator(mode="before")
|
|
1204
|
+
@classmethod
|
|
1205
|
+
def _check_flat_format_deprecated(cls, data: dict[str, Any]) -> dict[str, Any]:
|
|
1206
|
+
"""Forbid deprecated flat GEPA format fields.
|
|
1207
|
+
|
|
1208
|
+
Users must use nested format:
|
|
1209
|
+
- gepa.rollout.budget instead of gepa.rollout_budget
|
|
1210
|
+
- gepa.evaluation.seeds instead of gepa.evaluation_seeds
|
|
1211
|
+
- etc.
|
|
1212
|
+
"""
|
|
1213
|
+
if not isinstance(data, dict):
|
|
1214
|
+
return data
|
|
1215
|
+
|
|
1216
|
+
flat_fields_map = {
|
|
1217
|
+
"rollout_budget": "Use [prompt_learning.gepa.rollout] section with 'budget' field instead.",
|
|
1218
|
+
"max_concurrent_rollouts": "Use [prompt_learning.gepa.rollout] section with 'max_concurrent' field instead.",
|
|
1219
|
+
"minibatch_size": "Use [prompt_learning.gepa.rollout] section with 'minibatch_size' field instead.",
|
|
1220
|
+
"evaluation_seeds": "Use [prompt_learning.gepa.evaluation] section with 'seeds' field instead.",
|
|
1221
|
+
"validation_seeds": "Use [prompt_learning.gepa.evaluation] section with 'validation_seeds' field instead.",
|
|
1222
|
+
"test_pool": "Use [prompt_learning.gepa.evaluation] section with 'test_pool' field instead.",
|
|
1223
|
+
"validation_pool": "Use [prompt_learning.gepa.evaluation] section with 'validation_pool' field instead.",
|
|
1224
|
+
"validation_top_k": "Use [prompt_learning.gepa.evaluation] section with 'validation_top_k' field instead.",
|
|
1225
|
+
"mutation_rate": "Use [prompt_learning.gepa.mutation] section with 'rate' field instead.",
|
|
1226
|
+
"mutation_llm_model": "Use [prompt_learning.gepa.mutation] section with 'llm_model' field instead.",
|
|
1227
|
+
"mutation_llm_provider": "Use [prompt_learning.gepa.mutation] section with 'llm_provider' field instead.",
|
|
1228
|
+
"mutation_llm_inference_url": "Use [prompt_learning.gepa.mutation] section with 'llm_inference_url' field instead.",
|
|
1229
|
+
"mutation_prompt": "Use [prompt_learning.gepa.mutation] section with 'prompt' field instead.",
|
|
1230
|
+
"initial_population_size": "Use [prompt_learning.gepa.population] section with 'initial_size' field instead.",
|
|
1231
|
+
"num_generations": "Use [prompt_learning.gepa.population] section with 'num_generations' field instead.",
|
|
1232
|
+
"children_per_generation": "Use [prompt_learning.gepa.population] section with 'children_per_generation' field instead.",
|
|
1233
|
+
"crossover_rate": "Use [prompt_learning.gepa.population] section with 'crossover_rate' field instead.",
|
|
1234
|
+
"selection_pressure": "Use [prompt_learning.gepa.population] section with 'selection_pressure' field instead.",
|
|
1235
|
+
"patience_generations": "Use [prompt_learning.gepa.population] section with 'patience_generations' field instead.",
|
|
1236
|
+
"archive_size": "Use [prompt_learning.gepa.archive] section with 'size' field instead.",
|
|
1237
|
+
"pareto_set_size": "Use [prompt_learning.gepa.archive] section with 'pareto_set_size' field instead.",
|
|
1238
|
+
"pareto_eps": "Use [prompt_learning.gepa.archive] section with 'pareto_eps' field instead.",
|
|
1239
|
+
"feedback_fraction": "Use [prompt_learning.gepa.archive] section with 'feedback_fraction' field instead.",
|
|
1240
|
+
"max_token_limit": "Use [prompt_learning.gepa.token] section with 'max_limit' field instead.",
|
|
1241
|
+
"token_counting_model": "Use [prompt_learning.gepa.token] section with 'counting_model' field instead.",
|
|
1242
|
+
"enforce_pattern_token_limit": "Use [prompt_learning.gepa.token] section with 'enforce_pattern_limit' field instead.",
|
|
1243
|
+
"max_spend_usd": "Use [prompt_learning.gepa.token] section with 'max_spend_usd' field instead.",
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
for field, message in flat_fields_map.items():
|
|
1247
|
+
if field in data and data[field] is not None:
|
|
1248
|
+
raise ValueError(f"Deprecated flat GEPA format field '{field}': {message}")
|
|
1249
|
+
|
|
1250
|
+
return data
|
|
1251
|
+
|
|
1252
|
+
def _get_rollout_budget(self) -> int | None:
|
|
1253
|
+
"""Get rollout budget from nested or flat structure."""
|
|
1254
|
+
if self.rollout and self.rollout.budget is not None:
|
|
1255
|
+
return self.rollout.budget
|
|
1256
|
+
return self.rollout_budget
|
|
1257
|
+
|
|
1258
|
+
def _get_max_concurrent_rollouts(self) -> int:
|
|
1259
|
+
"""Get max concurrent rollouts from nested or flat structure."""
|
|
1260
|
+
if self.rollout and self.rollout.max_concurrent is not None:
|
|
1261
|
+
return self.rollout.max_concurrent
|
|
1262
|
+
return self.max_concurrent_rollouts or 20
|
|
1263
|
+
|
|
1264
|
+
def _get_minibatch_size(self) -> int:
|
|
1265
|
+
"""Get minibatch size from nested or flat structure."""
|
|
1266
|
+
if self.rollout and self.rollout.minibatch_size is not None:
|
|
1267
|
+
return self.rollout.minibatch_size
|
|
1268
|
+
return self.minibatch_size or 8
|
|
1269
|
+
|
|
1270
|
+
def _get_evaluation_seeds(self) -> list[int] | None:
|
|
1271
|
+
"""Get evaluation seeds from nested or flat structure."""
|
|
1272
|
+
if self.evaluation and self.evaluation.seeds is not None:
|
|
1273
|
+
return self.evaluation.seeds
|
|
1274
|
+
return self.evaluation_seeds
|
|
1275
|
+
|
|
1276
|
+
def _get_validation_seeds(self) -> list[int] | None:
|
|
1277
|
+
"""Get validation seeds from nested or flat structure."""
|
|
1278
|
+
if self.evaluation and self.evaluation.validation_seeds is not None:
|
|
1279
|
+
return self.evaluation.validation_seeds
|
|
1280
|
+
return self.validation_seeds
|
|
1281
|
+
|
|
1282
|
+
def _get_test_pool(self) -> list[int] | None:
|
|
1283
|
+
"""Get test pool from nested or flat structure."""
|
|
1284
|
+
if self.evaluation and self.evaluation.test_pool is not None:
|
|
1285
|
+
return self.evaluation.test_pool
|
|
1286
|
+
return self.test_pool
|
|
1287
|
+
|
|
1288
|
+
def _get_mutation_rate(self) -> float:
|
|
1289
|
+
"""Get mutation rate from nested or flat structure."""
|
|
1290
|
+
if self.mutation and self.mutation.rate is not None:
|
|
1291
|
+
return self.mutation.rate
|
|
1292
|
+
return self.mutation_rate or 0.3
|
|
1293
|
+
|
|
1294
|
+
def _get_mutation_llm_model(self) -> str | None:
|
|
1295
|
+
"""Get mutation LLM model from nested or flat structure."""
|
|
1296
|
+
if self.mutation and self.mutation.llm_model is not None:
|
|
1297
|
+
return self.mutation.llm_model
|
|
1298
|
+
return self.mutation_llm_model
|
|
1299
|
+
|
|
1300
|
+
def _get_mutation_llm_provider(self) -> str:
|
|
1301
|
+
"""Get mutation LLM provider from nested or flat structure."""
|
|
1302
|
+
if self.mutation and self.mutation.llm_provider is not None:
|
|
1303
|
+
return self.mutation.llm_provider
|
|
1304
|
+
return self.mutation_llm_provider or "groq"
|
|
1305
|
+
|
|
1306
|
+
def _get_mutation_llm_inference_url(self) -> str | None:
|
|
1307
|
+
"""Get mutation LLM inference URL from nested or flat structure."""
|
|
1308
|
+
if self.mutation and self.mutation.llm_inference_url is not None:
|
|
1309
|
+
return self.mutation.llm_inference_url
|
|
1310
|
+
return self.mutation_llm_inference_url
|
|
1311
|
+
|
|
1312
|
+
def _get_mutation_prompt(self) -> str | None:
|
|
1313
|
+
"""Get mutation prompt from nested or flat structure."""
|
|
1314
|
+
if self.mutation and self.mutation.prompt is not None:
|
|
1315
|
+
return self.mutation.prompt
|
|
1316
|
+
return self.mutation_prompt
|
|
1317
|
+
|
|
1318
|
+
def _get_initial_population_size(self) -> int:
|
|
1319
|
+
"""Get initial population size from nested or flat structure."""
|
|
1320
|
+
if self.population and self.population.initial_size is not None:
|
|
1321
|
+
return self.population.initial_size
|
|
1322
|
+
return self.initial_population_size or 20
|
|
1323
|
+
|
|
1324
|
+
def _get_num_generations(self) -> int:
|
|
1325
|
+
"""Get num generations from nested or flat structure."""
|
|
1326
|
+
if self.population and self.population.num_generations is not None:
|
|
1327
|
+
return self.population.num_generations
|
|
1328
|
+
return self.num_generations or 10
|
|
1329
|
+
|
|
1330
|
+
def _get_children_per_generation(self) -> int:
|
|
1331
|
+
"""Get children per generation from nested or flat structure."""
|
|
1332
|
+
if self.population and self.population.children_per_generation is not None:
|
|
1333
|
+
return self.population.children_per_generation
|
|
1334
|
+
return self.children_per_generation or 5
|
|
1335
|
+
|
|
1336
|
+
def _get_crossover_rate(self) -> float:
|
|
1337
|
+
"""Get crossover rate from nested or flat structure."""
|
|
1338
|
+
if self.population and self.population.crossover_rate is not None:
|
|
1339
|
+
return self.population.crossover_rate
|
|
1340
|
+
return self.crossover_rate or 0.5
|
|
1341
|
+
|
|
1342
|
+
def _get_selection_pressure(self) -> float:
|
|
1343
|
+
"""Get selection pressure from nested or flat structure."""
|
|
1344
|
+
if self.population and self.population.selection_pressure is not None:
|
|
1345
|
+
return self.population.selection_pressure
|
|
1346
|
+
return self.selection_pressure or 1.0
|
|
1347
|
+
|
|
1348
|
+
def _get_patience_generations(self) -> int:
|
|
1349
|
+
"""Get patience generations from nested or flat structure."""
|
|
1350
|
+
if self.population and self.population.patience_generations is not None:
|
|
1351
|
+
return self.population.patience_generations
|
|
1352
|
+
return self.patience_generations or 3
|
|
1353
|
+
|
|
1354
|
+
def _get_archive_size(self) -> int:
|
|
1355
|
+
"""Get archive size from nested or flat structure."""
|
|
1356
|
+
if self.archive and self.archive.size is not None:
|
|
1357
|
+
return self.archive.size
|
|
1358
|
+
return self.archive_size or 64
|
|
1359
|
+
|
|
1360
|
+
def _get_pareto_set_size(self) -> int:
|
|
1361
|
+
"""Get pareto set size from nested or flat structure."""
|
|
1362
|
+
if self.archive and self.archive.pareto_set_size is not None:
|
|
1363
|
+
return self.archive.pareto_set_size
|
|
1364
|
+
return self.pareto_set_size or 64
|
|
1365
|
+
|
|
1366
|
+
def _get_pareto_eps(self) -> float:
|
|
1367
|
+
"""Get pareto eps from nested or flat structure."""
|
|
1368
|
+
if self.archive and self.archive.pareto_eps is not None:
|
|
1369
|
+
return self.archive.pareto_eps
|
|
1370
|
+
return self.pareto_eps or 1e-6
|
|
1371
|
+
|
|
1372
|
+
def _get_feedback_fraction(self) -> float:
|
|
1373
|
+
"""Get feedback fraction from nested or flat structure."""
|
|
1374
|
+
if self.archive and self.archive.feedback_fraction is not None:
|
|
1375
|
+
return self.archive.feedback_fraction
|
|
1376
|
+
return self.feedback_fraction or 0.5
|
|
1377
|
+
|
|
1378
|
+
def _get_max_token_limit(self) -> int | None:
|
|
1379
|
+
"""Get max token limit from nested or flat structure."""
|
|
1380
|
+
if self.token and self.token.max_limit is not None:
|
|
1381
|
+
return self.token.max_limit
|
|
1382
|
+
return self.max_token_limit
|
|
1383
|
+
|
|
1384
|
+
def _get_token_counting_model(self) -> str:
|
|
1385
|
+
"""Get token counting model from nested or flat structure."""
|
|
1386
|
+
if self.token and self.token.counting_model is not None:
|
|
1387
|
+
return self.token.counting_model
|
|
1388
|
+
return self.token_counting_model or "gpt-4"
|
|
1389
|
+
|
|
1390
|
+
def _get_enforce_pattern_token_limit(self) -> bool:
|
|
1391
|
+
"""Get enforce pattern token limit from nested or flat structure."""
|
|
1392
|
+
if self.token and self.token.enforce_pattern_limit is not None:
|
|
1393
|
+
return self.token.enforce_pattern_limit
|
|
1394
|
+
return self.enforce_pattern_token_limit if self.enforce_pattern_token_limit is not None else True
|
|
1395
|
+
|
|
1396
|
+
def _get_max_spend_usd(self) -> float | None:
|
|
1397
|
+
"""Get max spend USD from nested or flat structure."""
|
|
1398
|
+
if self.token and self.token.max_spend_usd is not None:
|
|
1399
|
+
return self.token.max_spend_usd
|
|
1400
|
+
return self.max_spend_usd
|
|
1401
|
+
|
|
1402
|
+
@classmethod
|
|
1403
|
+
def from_mapping(cls, data: Mapping[str, Any]) -> GEPAConfig:
|
|
1404
|
+
"""Load GEPA config from dict/TOML, handling both nested and flat structures."""
|
|
1405
|
+
# Check for nested structure first
|
|
1406
|
+
nested_data = {}
|
|
1407
|
+
flat_data = {}
|
|
1408
|
+
|
|
1409
|
+
for key, value in data.items():
|
|
1410
|
+
if key in ("rollout", "evaluation", "mutation", "population", "archive", "token", "modules", "proxy_models", "adaptive_pool", "adaptive_batch", "judge"):
|
|
1411
|
+
nested_data[key] = value
|
|
1412
|
+
else:
|
|
1413
|
+
flat_data[key] = value
|
|
1414
|
+
|
|
1415
|
+
# If we have nested data, create nested configs
|
|
1416
|
+
if nested_data:
|
|
1417
|
+
if "rollout" in nested_data:
|
|
1418
|
+
nested_data["rollout"] = GEPARolloutConfig.model_validate(nested_data["rollout"])
|
|
1419
|
+
if "evaluation" in nested_data:
|
|
1420
|
+
nested_data["evaluation"] = GEPAEvaluationConfig.model_validate(nested_data["evaluation"])
|
|
1421
|
+
if "mutation" in nested_data:
|
|
1422
|
+
nested_data["mutation"] = GEPAMutationConfig.model_validate(nested_data["mutation"])
|
|
1423
|
+
if "population" in nested_data:
|
|
1424
|
+
nested_data["population"] = GEPAPopulationConfig.model_validate(nested_data["population"])
|
|
1425
|
+
if "archive" in nested_data:
|
|
1426
|
+
nested_data["archive"] = GEPAArchiveConfig.model_validate(nested_data["archive"])
|
|
1427
|
+
if "token" in nested_data:
|
|
1428
|
+
nested_data["token"] = GEPATokenConfig.model_validate(nested_data["token"])
|
|
1429
|
+
if "modules" in nested_data:
|
|
1430
|
+
modules_data = nested_data["modules"]
|
|
1431
|
+
if isinstance(modules_data, list):
|
|
1432
|
+
nested_data["modules"] = [
|
|
1433
|
+
GEPAModuleConfig.model_validate(m) if isinstance(m, dict) else m
|
|
1434
|
+
for m in modules_data
|
|
1435
|
+
]
|
|
1436
|
+
# Handle proxy_models in gepa config (only if specified, defaults to None)
|
|
1437
|
+
if "proxy_models" in nested_data and isinstance(nested_data["proxy_models"], dict):
|
|
1438
|
+
nested_data["proxy_models"] = ProxyModelsConfig.model_validate(nested_data["proxy_models"])
|
|
1439
|
+
# If proxy_models not specified, leave as None (defaults to disabled)
|
|
1440
|
+
|
|
1441
|
+
# Handle adaptive_pool in gepa config (only if specified, defaults to None)
|
|
1442
|
+
if "adaptive_pool" in nested_data and isinstance(nested_data["adaptive_pool"], dict):
|
|
1443
|
+
# Resolve adaptive pool config with level and overrides
|
|
1444
|
+
adaptive_pool_data = nested_data["adaptive_pool"]
|
|
1445
|
+
level = adaptive_pool_data.get("level")
|
|
1446
|
+
# If level not specified, default to LOW (conservative SDK default)
|
|
1447
|
+
overrides = {k: v for k, v in adaptive_pool_data.items() if k != "level"}
|
|
1448
|
+
# Get dev_pool_size from evaluation.seeds if available
|
|
1449
|
+
dev_pool_size = None
|
|
1450
|
+
if "evaluation" in nested_data:
|
|
1451
|
+
eval_config = nested_data["evaluation"]
|
|
1452
|
+
# Handle both dict and Pydantic model (GEPAEvaluationConfig)
|
|
1453
|
+
if isinstance(eval_config, dict):
|
|
1454
|
+
eval_seeds = eval_config.get("seeds")
|
|
1455
|
+
else:
|
|
1456
|
+
# Pydantic model - use attribute access
|
|
1457
|
+
eval_seeds = getattr(eval_config, "seeds", None)
|
|
1458
|
+
if isinstance(eval_seeds, list):
|
|
1459
|
+
dev_pool_size = len(eval_seeds)
|
|
1460
|
+
nested_data["adaptive_pool"] = resolve_adaptive_pool_config(
|
|
1461
|
+
level=level, # Will default to LOW if None (via resolve_adaptive_pool_config)
|
|
1462
|
+
overrides=overrides if overrides else None,
|
|
1463
|
+
dev_pool_size=dev_pool_size,
|
|
1464
|
+
)
|
|
1465
|
+
# If adaptive_pool not specified, leave as None (defaults to disabled)
|
|
1466
|
+
if "adaptive_batch" in nested_data and isinstance(nested_data["adaptive_batch"], dict):
|
|
1467
|
+
# Resolve adaptive batch config with level and overrides
|
|
1468
|
+
adaptive_batch_data = nested_data["adaptive_batch"]
|
|
1469
|
+
level = adaptive_batch_data.get("level")
|
|
1470
|
+
overrides = {k: v for k, v in adaptive_batch_data.items() if k != "level"}
|
|
1471
|
+
try:
|
|
1472
|
+
nested_data["adaptive_batch"] = resolve_adaptive_batch_config(
|
|
1473
|
+
level=level,
|
|
1474
|
+
overrides=overrides if overrides else None,
|
|
1475
|
+
)
|
|
1476
|
+
except Exception as exc:
|
|
1477
|
+
# Re-raise with clearer context
|
|
1478
|
+
raise ValueError(f"Failed to resolve adaptive_batch config: {exc}") from exc
|
|
1479
|
+
|
|
1480
|
+
# Merge nested and flat data
|
|
1481
|
+
merged_data = {**flat_data, **nested_data}
|
|
1482
|
+
return cls.model_validate(merged_data)
|
|
1483
|
+
|
|
1484
|
+
|
|
1485
|
+
class PromptLearningConfig(ExtraModel):
|
|
1486
|
+
"""Top-level prompt learning configuration."""
|
|
1487
|
+
algorithm: str # "mipro" or "gepa"
|
|
1488
|
+
task_app_url: str
|
|
1489
|
+
task_app_api_key: str | None = None
|
|
1490
|
+
task_app_id: str | None = None
|
|
1491
|
+
initial_prompt: PromptPatternConfig | None = None
|
|
1492
|
+
policy: PromptLearningPolicyConfig | None = None
|
|
1493
|
+
mipro: MIPROConfig | None = None
|
|
1494
|
+
gepa: GEPAConfig | None = None
|
|
1495
|
+
judge: PromptLearningJudgeConfig | dict[str, Any] | None = None
|
|
1496
|
+
proxy_models: ProxyModelsConfig | dict[str, Any] | None = None # Proxy models config (can be at top-level or algorithm-specific)
|
|
1497
|
+
env_config: dict[str, Any] | None = None
|
|
1498
|
+
|
|
1499
|
+
# Free tier configuration
|
|
1500
|
+
free_tier: bool = Field(
|
|
1501
|
+
default=False,
|
|
1502
|
+
description=(
|
|
1503
|
+
"Enable free tier mode. Uses cost-effective OSS models for policy and proposer. "
|
|
1504
|
+
"Requires proposer_effort='LOW' or 'MEDIUM' (not 'HIGH'). "
|
|
1505
|
+
"Counts against your org's free tier limits. When limits are exceeded, "
|
|
1506
|
+
"remove this flag to run as paid job."
|
|
1507
|
+
),
|
|
1508
|
+
)
|
|
1509
|
+
|
|
1510
|
+
@model_validator(mode="before")
|
|
1511
|
+
@classmethod
|
|
1512
|
+
def _validate_free_tier_config(cls, data: dict[str, Any]) -> dict[str, Any]:
|
|
1513
|
+
"""Validate that free tier jobs use eligible proposer_effort levels."""
|
|
1514
|
+
if not isinstance(data, dict):
|
|
1515
|
+
return data
|
|
1516
|
+
|
|
1517
|
+
# Check if free tier is enabled
|
|
1518
|
+
free_tier = data.get("free_tier", False)
|
|
1519
|
+
if isinstance(free_tier, str):
|
|
1520
|
+
free_tier = free_tier.lower() in ("true", "1", "yes", "on")
|
|
1521
|
+
if not free_tier:
|
|
1522
|
+
return data
|
|
1523
|
+
|
|
1524
|
+
# Get proposer_effort from GEPA or MIPRO config
|
|
1525
|
+
proposer_effort = None
|
|
1526
|
+
gepa = data.get("gepa", {})
|
|
1527
|
+
if isinstance(gepa, dict):
|
|
1528
|
+
proposer_effort = gepa.get("proposer_effort")
|
|
1529
|
+
if proposer_effort is None:
|
|
1530
|
+
mipro = data.get("mipro", {})
|
|
1531
|
+
if isinstance(mipro, dict):
|
|
1532
|
+
proposer_effort = mipro.get("proposer_effort")
|
|
1533
|
+
|
|
1534
|
+
# Default to "LOW" if not specified (which is free tier eligible)
|
|
1535
|
+
if proposer_effort is None:
|
|
1536
|
+
proposer_effort = "LOW"
|
|
1537
|
+
|
|
1538
|
+
# Validate proposer_effort is eligible for free tier
|
|
1539
|
+
free_tier_efforts = {"LOW_CONTEXT", "LOW", "MEDIUM"}
|
|
1540
|
+
effort_upper = proposer_effort.upper() if isinstance(proposer_effort, str) else str(proposer_effort).upper()
|
|
1541
|
+
if effort_upper not in free_tier_efforts:
|
|
1542
|
+
raise ValueError(
|
|
1543
|
+
f"Free tier requires proposer_effort to be one of: {', '.join(sorted(free_tier_efforts))}. "
|
|
1544
|
+
f"Got: '{proposer_effort}'. "
|
|
1545
|
+
f"Either change proposer_effort to 'LOW' or 'MEDIUM', or remove 'free_tier = true' from your config."
|
|
1546
|
+
)
|
|
1547
|
+
|
|
1548
|
+
return data
|
|
1549
|
+
|
|
1550
|
+
@model_validator(mode="before")
|
|
1551
|
+
@classmethod
|
|
1552
|
+
def _check_deprecated_fields(cls, data: dict[str, Any]) -> dict[str, Any]:
|
|
1553
|
+
"""Remove deprecated fields that are no longer used.
|
|
1554
|
+
|
|
1555
|
+
These fields are silently removed to maintain backwards compatibility
|
|
1556
|
+
with older configs while the CLI validation module warns about them.
|
|
1557
|
+
"""
|
|
1558
|
+
if not isinstance(data, dict):
|
|
1559
|
+
return data
|
|
1560
|
+
|
|
1561
|
+
# Silently remove deprecated fields (don't raise errors)
|
|
1562
|
+
deprecated_fields = {"display", "results_folder", "env_file_path"}
|
|
1563
|
+
|
|
1564
|
+
for field in deprecated_fields:
|
|
1565
|
+
if field in data:
|
|
1566
|
+
data.pop(field, None)
|
|
1567
|
+
|
|
1568
|
+
return data
|
|
1569
|
+
|
|
1570
|
+
def to_dict(self) -> dict[str, Any]:
|
|
1571
|
+
"""Convert config to dictionary for API payload."""
|
|
1572
|
+
result = self.model_dump(mode="python", exclude_none=True)
|
|
1573
|
+
# Ensure prompt_learning section wraps everything
|
|
1574
|
+
if "prompt_learning" not in result:
|
|
1575
|
+
pl_data = dict(result.items())
|
|
1576
|
+
result = {"prompt_learning": pl_data}
|
|
1577
|
+
return result
|
|
1578
|
+
|
|
1579
|
+
@classmethod
|
|
1580
|
+
def from_mapping(cls, data: Mapping[str, Any]) -> PromptLearningConfig:
|
|
1581
|
+
"""Load prompt learning config from dict/TOML mapping."""
|
|
1582
|
+
# Remove deprecated fields at top level (silently for backwards compatibility)
|
|
1583
|
+
# The CLI validation module will warn about these
|
|
1584
|
+
deprecated_top_level = {"display", "results_folder", "env_file_path"}
|
|
1585
|
+
|
|
1586
|
+
# Convert to mutable dict if needed
|
|
1587
|
+
if not isinstance(data, dict):
|
|
1588
|
+
data = dict(data)
|
|
1589
|
+
else:
|
|
1590
|
+
data = dict(data) # Create a copy to avoid modifying the original
|
|
1591
|
+
|
|
1592
|
+
for field in deprecated_top_level:
|
|
1593
|
+
if field in data:
|
|
1594
|
+
data.pop(field, None)
|
|
1595
|
+
|
|
1596
|
+
# Handle both [prompt_learning] section and flat structure
|
|
1597
|
+
pl_data = data.get("prompt_learning", {})
|
|
1598
|
+
if not pl_data:
|
|
1599
|
+
# If no prompt_learning section, assume top-level is prompt_learning
|
|
1600
|
+
pl_data = dict(data)
|
|
1601
|
+
|
|
1602
|
+
# Handle proxy_models at top-level FIRST (takes precedence over algorithm-specific)
|
|
1603
|
+
# This ensures top-level proxy_models is available for algorithm configs to check
|
|
1604
|
+
# Default: None (proxy models disabled unless explicitly configured)
|
|
1605
|
+
top_level_proxy_models = None
|
|
1606
|
+
if "proxy_models" in pl_data and isinstance(pl_data["proxy_models"], dict):
|
|
1607
|
+
top_level_proxy_models = ProxyModelsConfig.model_validate(pl_data["proxy_models"])
|
|
1608
|
+
pl_data["proxy_models"] = top_level_proxy_models
|
|
1609
|
+
# If proxy_models not specified, leave as None (defaults to disabled)
|
|
1610
|
+
|
|
1611
|
+
# Handle gepa config specially to support nested structure
|
|
1612
|
+
if "gepa" in pl_data and isinstance(pl_data["gepa"], dict):
|
|
1613
|
+
gepa_data = pl_data["gepa"]
|
|
1614
|
+
# If top-level proxy_models exists, remove gepa-specific proxy_models (top-level takes precedence)
|
|
1615
|
+
if top_level_proxy_models is not None and "proxy_models" in gepa_data:
|
|
1616
|
+
gepa_data.pop("proxy_models")
|
|
1617
|
+
pl_data["gepa"] = GEPAConfig.from_mapping(gepa_data)
|
|
1618
|
+
# Ensure gepa config uses top-level proxy_models if available
|
|
1619
|
+
if top_level_proxy_models is not None:
|
|
1620
|
+
# Note: gepa.proxy_models will be None, but top-level proxy_models will be used by backend
|
|
1621
|
+
pass
|
|
1622
|
+
|
|
1623
|
+
# Handle mipro config - check for adaptive_pool
|
|
1624
|
+
if "mipro" in pl_data and isinstance(pl_data["mipro"], dict):
|
|
1625
|
+
mipro_data = pl_data["mipro"]
|
|
1626
|
+
# If top-level proxy_models exists, remove mipro-specific proxy_models (top-level takes precedence)
|
|
1627
|
+
if top_level_proxy_models is not None and "proxy_models" in mipro_data:
|
|
1628
|
+
mipro_data.pop("proxy_models")
|
|
1629
|
+
|
|
1630
|
+
# Extract bootstrap_train_seeds and online_pool from top-level pl_data if not in mipro_data
|
|
1631
|
+
# These fields can be at top-level [prompt_learning] or nested [prompt_learning.mipro]
|
|
1632
|
+
if "bootstrap_train_seeds" not in mipro_data and "bootstrap_train_seeds" in pl_data:
|
|
1633
|
+
mipro_data["bootstrap_train_seeds"] = pl_data["bootstrap_train_seeds"]
|
|
1634
|
+
if "online_pool" not in mipro_data and "online_pool" in pl_data:
|
|
1635
|
+
mipro_data["online_pool"] = pl_data["online_pool"]
|
|
1636
|
+
if "test_pool" not in mipro_data and "test_pool" in pl_data:
|
|
1637
|
+
mipro_data["test_pool"] = pl_data["test_pool"]
|
|
1638
|
+
if "reference_pool" not in mipro_data and "reference_pool" in pl_data:
|
|
1639
|
+
mipro_data["reference_pool"] = pl_data["reference_pool"]
|
|
1640
|
+
|
|
1641
|
+
# Handle adaptive_pool in mipro config (only if specified, defaults to None)
|
|
1642
|
+
if "adaptive_pool" in mipro_data and isinstance(mipro_data["adaptive_pool"], dict):
|
|
1643
|
+
adaptive_pool_data = mipro_data["adaptive_pool"]
|
|
1644
|
+
level = adaptive_pool_data.get("level")
|
|
1645
|
+
# If level not specified, default to LOW (conservative SDK default)
|
|
1646
|
+
overrides = {k: v for k, v in adaptive_pool_data.items() if k != "level"}
|
|
1647
|
+
# Get dev_pool_size from online_pool if available
|
|
1648
|
+
dev_pool_size = None
|
|
1649
|
+
online_pool = mipro_data.get("online_pool") or (mipro_data.get("seeds") or {}).get("online", [])
|
|
1650
|
+
if isinstance(online_pool, list):
|
|
1651
|
+
dev_pool_size = len(online_pool)
|
|
1652
|
+
try:
|
|
1653
|
+
mipro_data["adaptive_pool"] = resolve_adaptive_pool_config(
|
|
1654
|
+
level=level, # Will default to LOW if None (via resolve_adaptive_pool_config)
|
|
1655
|
+
overrides=overrides if overrides else None,
|
|
1656
|
+
dev_pool_size=dev_pool_size,
|
|
1657
|
+
)
|
|
1658
|
+
except Exception as exc:
|
|
1659
|
+
# Re-raise with clearer context
|
|
1660
|
+
raise ValueError(f"Failed to resolve mipro.adaptive_pool config: {exc}") from exc
|
|
1661
|
+
# If adaptive_pool not specified, leave as None (defaults to disabled)
|
|
1662
|
+
|
|
1663
|
+
# Handle proxy_models in mipro config (only if specified, defaults to None)
|
|
1664
|
+
if "proxy_models" in mipro_data and isinstance(mipro_data["proxy_models"], dict):
|
|
1665
|
+
mipro_data["proxy_models"] = ProxyModelsConfig.model_validate(mipro_data["proxy_models"])
|
|
1666
|
+
# If proxy_models not specified, leave as None (defaults to disabled)
|
|
1667
|
+
|
|
1668
|
+
if "judge" in pl_data and isinstance(pl_data["judge"], dict):
|
|
1669
|
+
pl_data["judge"] = PromptLearningJudgeConfig.model_validate(pl_data["judge"])
|
|
1670
|
+
|
|
1671
|
+
return cls.model_validate(pl_data)
|
|
1672
|
+
|
|
1673
|
+
@classmethod
|
|
1674
|
+
def from_path(cls, path: Path) -> PromptLearningConfig:
|
|
1675
|
+
"""Load prompt learning config from TOML file."""
|
|
1676
|
+
content = load_toml(path)
|
|
1677
|
+
return cls.from_mapping(content)
|
|
1678
|
+
|
|
1679
|
+
|
|
1680
|
+
__all__ = [
|
|
1681
|
+
"GEPAConfig",
|
|
1682
|
+
"GEPAModuleConfig",
|
|
1683
|
+
"GEPARolloutConfig",
|
|
1684
|
+
"GEPAEvaluationConfig",
|
|
1685
|
+
"GEPAMutationConfig",
|
|
1686
|
+
"GEPAPopulationConfig",
|
|
1687
|
+
"GEPAArchiveConfig",
|
|
1688
|
+
"GEPATokenConfig",
|
|
1689
|
+
"GEPAAdaptiveBatchConfig",
|
|
1690
|
+
"MIPROConfig",
|
|
1691
|
+
"MIPROMetaConfig",
|
|
1692
|
+
"MIPROModuleConfig",
|
|
1693
|
+
"MIPROStageConfig",
|
|
1694
|
+
"MIPROSeedConfig",
|
|
1695
|
+
"MessagePatternConfig",
|
|
1696
|
+
"PromptLearningConfig",
|
|
1697
|
+
"PromptLearningPolicyConfig",
|
|
1698
|
+
"PromptPatternConfig",
|
|
1699
|
+
"PromptLearningJudgeConfig",
|
|
1700
|
+
"ProxyModelsConfig",
|
|
1701
|
+
"AdaptivePoolConfig",
|
|
1702
|
+
"AdaptiveCurriculumLevel",
|
|
1703
|
+
"AdaptiveBatchLevel",
|
|
1704
|
+
"resolve_adaptive_pool_config",
|
|
1705
|
+
"resolve_adaptive_batch_config",
|
|
1706
|
+
]
|