PyPI - synth-ai - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show

synth_ai/__init__.py +13 -13
synth_ai/cli/__init__.py +6 -15
synth_ai/cli/commands/eval/__init__.py +6 -15
synth_ai/cli/commands/eval/config.py +338 -0
synth_ai/cli/commands/eval/core.py +236 -1091
synth_ai/cli/commands/eval/runner.py +704 -0
synth_ai/cli/commands/eval/validation.py +44 -117
synth_ai/cli/commands/filter/core.py +7 -7
synth_ai/cli/commands/filter/validation.py +2 -2
synth_ai/cli/commands/smoke/core.py +7 -17
synth_ai/cli/commands/status/__init__.py +1 -64
synth_ai/cli/commands/status/client.py +50 -151
synth_ai/cli/commands/status/config.py +3 -83
synth_ai/cli/commands/status/errors.py +4 -13
synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
synth_ai/cli/commands/status/subcommands/config.py +13 -0
synth_ai/cli/commands/status/subcommands/files.py +18 -63
synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
synth_ai/cli/commands/status/subcommands/models.py +18 -62
synth_ai/cli/commands/status/subcommands/runs.py +16 -63
synth_ai/cli/commands/status/subcommands/session.py +67 -172
synth_ai/cli/commands/status/subcommands/summary.py +24 -32
synth_ai/cli/commands/status/subcommands/utils.py +41 -0
synth_ai/cli/commands/status/utils.py +16 -107
synth_ai/cli/commands/train/__init__.py +18 -20
synth_ai/cli/commands/train/errors.py +3 -3
synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
synth_ai/cli/commands/train/validation.py +7 -7
synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
synth_ai/cli/commands/train/verifier_validation.py +235 -0
synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
synth_ai/cli/lib/apps/task_app.py +12 -13
synth_ai/cli/lib/task_app_discovery.py +6 -6
synth_ai/cli/lib/train_cfgs.py +10 -10
synth_ai/cli/task_apps/__init__.py +11 -0
synth_ai/cli/task_apps/commands.py +7 -15
synth_ai/core/env.py +12 -1
synth_ai/core/errors.py +1 -2
synth_ai/core/integrations/cloudflare.py +209 -33
synth_ai/core/tracing_v3/abstractions.py +46 -0
synth_ai/data/__init__.py +3 -30
synth_ai/data/enums.py +1 -20
synth_ai/data/rewards.py +100 -3
synth_ai/products/graph_evolve/__init__.py +1 -2
synth_ai/products/graph_evolve/config.py +16 -16
synth_ai/products/graph_evolve/converters/__init__.py +3 -3
synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
synth_ai/products/graph_gepa/__init__.py +23 -0
synth_ai/products/graph_gepa/converters/__init__.py +19 -0
synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
synth_ai/sdk/__init__.py +45 -35
synth_ai/sdk/api/eval/__init__.py +33 -0
synth_ai/sdk/api/eval/job.py +732 -0
synth_ai/sdk/api/research_agent/__init__.py +276 -66
synth_ai/sdk/api/train/builders.py +181 -0
synth_ai/sdk/api/train/cli.py +41 -33
synth_ai/sdk/api/train/configs/__init__.py +6 -4
synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
synth_ai/sdk/api/train/configs/rl.py +264 -16
synth_ai/sdk/api/train/configs/sft.py +165 -1
synth_ai/sdk/api/train/graph_validators.py +12 -12
synth_ai/sdk/api/train/graphgen.py +169 -51
synth_ai/sdk/api/train/graphgen_models.py +95 -45
synth_ai/sdk/api/train/local_api.py +10 -0
synth_ai/sdk/api/train/pollers.py +36 -0
synth_ai/sdk/api/train/prompt_learning.py +390 -60
synth_ai/sdk/api/train/rl.py +41 -5
synth_ai/sdk/api/train/sft.py +2 -0
synth_ai/sdk/api/train/task_app.py +20 -0
synth_ai/sdk/api/train/validators.py +17 -17
synth_ai/sdk/graphs/completions.py +239 -33
synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
synth_ai/sdk/learning/__init__.py +35 -5
synth_ai/sdk/learning/context_learning_client.py +531 -0
synth_ai/sdk/learning/context_learning_types.py +294 -0
synth_ai/sdk/learning/prompt_learning_client.py +1 -1
synth_ai/sdk/learning/prompt_learning_types.py +2 -1
synth_ai/sdk/learning/rl/__init__.py +0 -4
synth_ai/sdk/learning/rl/contracts.py +0 -4
synth_ai/sdk/localapi/__init__.py +40 -0
synth_ai/sdk/localapi/apps/__init__.py +28 -0
synth_ai/sdk/localapi/client.py +10 -0
synth_ai/sdk/localapi/contracts.py +10 -0
synth_ai/sdk/localapi/helpers.py +519 -0
synth_ai/sdk/localapi/rollouts.py +93 -0
synth_ai/sdk/localapi/server.py +29 -0
synth_ai/sdk/localapi/template.py +49 -0
synth_ai/sdk/streaming/handlers.py +6 -6
synth_ai/sdk/streaming/streamer.py +10 -6
synth_ai/sdk/task/__init__.py +18 -5
synth_ai/sdk/task/apps/__init__.py +37 -1
synth_ai/sdk/task/client.py +9 -1
synth_ai/sdk/task/config.py +6 -11
synth_ai/sdk/task/contracts.py +137 -95
synth_ai/sdk/task/in_process.py +32 -22
synth_ai/sdk/task/in_process_runner.py +9 -4
synth_ai/sdk/task/rubrics/__init__.py +2 -3
synth_ai/sdk/task/rubrics/loaders.py +4 -4
synth_ai/sdk/task/rubrics/strict.py +3 -4
synth_ai/sdk/task/server.py +76 -16
synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
synth_ai/sdk/task/validators.py +34 -49
synth_ai/sdk/training/__init__.py +7 -16
synth_ai/sdk/tunnels/__init__.py +118 -0
synth_ai/sdk/tunnels/cleanup.py +83 -0
synth_ai/sdk/tunnels/ports.py +120 -0
synth_ai/sdk/tunnels/tunneled_api.py +363 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
synth_ai/cli/commands/baseline/__init__.py +0 -12
synth_ai/cli/commands/baseline/core.py +0 -636
synth_ai/cli/commands/baseline/list.py +0 -94
synth_ai/cli/commands/eval/errors.py +0 -81
synth_ai/cli/commands/status/formatters.py +0 -164
synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
synth_ai/cli/commands/status/subcommands/usage.py +0 -203
synth_ai/cli/commands/train/judge_validation.py +0 -305
synth_ai/cli/usage.py +0 -159
synth_ai/data/specs.py +0 -36
synth_ai/sdk/api/research_agent/cli.py +0 -428
synth_ai/sdk/api/research_agent/config.py +0 -357
synth_ai/sdk/api/research_agent/job.py +0 -717
synth_ai/sdk/baseline/__init__.py +0 -25
synth_ai/sdk/baseline/config.py +0 -209
synth_ai/sdk/baseline/discovery.py +0 -216
synth_ai/sdk/baseline/execution.py +0 -154
synth_ai/sdk/judging/__init__.py +0 -15
synth_ai/sdk/judging/base.py +0 -24
synth_ai/sdk/judging/client.py +0 -191
synth_ai/sdk/judging/types.py +0 -42
synth_ai/sdk/research_agent/__init__.py +0 -34
synth_ai/sdk/research_agent/container_builder.py +0 -328
synth_ai/sdk/research_agent/container_spec.py +0 -198
synth_ai/sdk/research_agent/defaults.py +0 -34
synth_ai/sdk/research_agent/results_collector.py +0 -69
synth_ai/sdk/specs/__init__.py +0 -46
synth_ai/sdk/specs/dataclasses.py +0 -149
synth_ai/sdk/specs/loader.py +0 -144
synth_ai/sdk/specs/serializer.py +0 -199
synth_ai/sdk/specs/validation.py +0 -250
synth_ai/sdk/tracing/__init__.py +0 -39
synth_ai/sdk/usage/__init__.py +0 -37
synth_ai/sdk/usage/client.py +0 -171
synth_ai/sdk/usage/models.py +0 -261
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0

synth_ai/__init__.py CHANGED Viewed

@@ -12,15 +12,15 @@ except Exception:
     # Silently fail if log filter can't be installed
     pass
-# Judge schemas moved to sdk/judging/schemas.py
-from synth_ai.sdk.judging.schemas import (
+# Verifier schemas live under sdk/graphs/verifier_schemas.py
+from synth_ai.sdk.graphs.verifier_schemas import (
     CriterionScorePayload,
-    JudgeOptions,
-    JudgeScoreRequest,
-    JudgeScoreResponse,
-    JudgeTaskApp,
-    JudgeTracePayload,
     ReviewPayload,
+    VerifierOptions,
+    VerifierScoreRequest,
+    VerifierScoreResponse,
+    VerifierTaskApp,
+    VerifierTracePayload,
 )
 try:  # Prefer the installed package metadata when available
@@ -45,12 +45,12 @@ EventPartitionElement = RewardSignal = SystemTrace = TrainingQuestion = None  #
 trace_event_async = trace_event_sync = upload = None  # type: ignore
 __all__ = [
-    # Judge API contracts
-    "JudgeScoreRequest",
-    "JudgeScoreResponse",
-    "JudgeOptions",
-    "JudgeTaskApp",
-    "JudgeTracePayload",
+    # Verifier API contracts
+    "VerifierScoreRequest",
+    "VerifierScoreResponse",
+    "VerifierOptions",
+    "VerifierTaskApp",
+    "VerifierTracePayload",
     "ReviewPayload",
     "CriterionScorePayload",
 ]  # Explicitly define public API (v1 tracing omitted in minimal env)

synth_ai/cli/__init__.py CHANGED Viewed

@@ -1,8 +1,7 @@
 """CLI subcommands for Synth AI.
-This package hosts modular commands (watch, traces, recent, status)
-and exposes a top-level Click group named `cli` compatible with the
-pyproject entry point `synth_ai.cli:cli`.
+This package hosts modular commands and exposes a top-level Click group
+named `cli` compatible with the pyproject entry point `synth_ai.cli:cli`.
 """
 import importlib
@@ -11,9 +10,6 @@ from collections.abc import Callable
 from typing import Any
 from synth_ai.cli.agents import claude_cmd, codex_cmd, opencode_cmd
-from synth_ai.cli.commands.baseline import command as baseline_cmd
-from synth_ai.cli.commands.baseline.list import list_command as baseline_list_cmd
-from synth_ai.cli.commands.eval import command as eval_cmd
 from synth_ai.cli.demos.demo import demo_cmd
 from synth_ai.cli.deploy import deploy_cmd
 from synth_ai.cli.infra.mcp import mcp_cmd
@@ -21,7 +17,6 @@ from synth_ai.cli.infra.modal_app import modal_app_cmd
 from synth_ai.cli.infra.setup import setup_cmd
 from synth_ai.cli.task_apps import task_app_cmd
 from synth_ai.cli.training.train_cfg import train_cfg_cmd
-from synth_ai.cli.usage import usage_cmd
 # Load environment variables from a local .env if present (repo root)
 try:
@@ -67,24 +62,20 @@ cli = _cli_module.cli  # type: ignore[attr-defined]
 # Register core commands implemented as standalone modules
-cli.add_command(baseline_cmd, name="baseline")
-baseline_cmd.add_command(baseline_list_cmd, name="list")
 cli.add_command(claude_cmd, name="claude")
 cli.add_command(codex_cmd, name="codex")
 cli.add_command(demo_cmd, name="demo")
 cli.add_command(deploy_cmd, name="deploy")
-cli.add_command(eval_cmd, name="eval")
 cli.add_command(mcp_cmd, name="mcp")
 cli.add_command(modal_app_cmd, name="modal-app")
 cli.add_command(opencode_cmd, name="opencode")
 cli.add_command(setup_cmd, name="setup")
 cli.add_command(task_app_cmd, name="task-app")
 cli.add_command(train_cfg_cmd, name="train-cfg")
-cli.add_command(usage_cmd, name="usage")
 # Register optional subcommands packaged under synth_ai.cli.*
-for _module_path in ("synth_ai.cli.commands.demo", "synth_ai.cli.commands.status", "synth_ai.cli.infra.turso"):
+for _module_path in ("synth_ai.cli.commands.demo", "synth_ai.cli.infra.turso"):
     module = _maybe_import(_module_path)
     if not module:
         continue
@@ -108,6 +99,9 @@ _maybe_call("synth_ai.cli.commands.help.core", "register", cli)
 # Register scan command
 _maybe_call("synth_ai.cli.commands.scan", "register", cli)
+# Register eval command
+_maybe_call("synth_ai.cli.commands.eval", "register", cli)
 # Train CLI lives under synth_ai.sdk.api.train
 _maybe_call("synth_ai.sdk.api.train", "register", cli)
@@ -136,6 +130,3 @@ _maybe_call("synth_ai.cli.utils.queue", "register", cli)
 # Artifacts commands
 _maybe_call("synth_ai.cli.commands.artifacts", "register", cli)
-# Research Agent commands
-_maybe_call("synth_ai.sdk.api.research_agent", "register", cli)

synth_ai/cli/commands/eval/__init__.py CHANGED Viewed

@@ -1,19 +1,10 @@
-from __future__ import annotations
-from .errors import EvalCliError
-from .validation import validate_eval_options
+"""Eval command package."""
-__all__ = [
-    "command",
-    "get_command",
-    "EvalCliError",
-    "validate_eval_options",
-]
+from __future__ import annotations
+def register(cli) -> None:
+    from synth_ai.cli.commands.eval.core import eval_command
+    cli.add_command(eval_command, name="eval")
-def __getattr__(name: str):
-    if name in {"command", "get_command"}:
-        from .core import command, get_command
-        return command if name == "command" else get_command
-    raise AttributeError(name)
+__all__ = ["register"]

synth_ai/cli/commands/eval/config.py ADDED Viewed

@@ -0,0 +1,338 @@
+"""Eval command configuration loading and normalization.
+This module handles loading and resolving evaluation configuration from:
+- TOML config files (legacy eval format or prompt_learning format)
+- Command-line arguments (override config values)
+- Environment variables (for API keys, etc.)
+**Config File Formats:**
+1. **Legacy Eval Format:**
+    ```toml
+    [eval]
+    app_id = "banking77"
+    url = "http://localhost:8103"
+    env_name = "banking77"
+    seeds = [0, 1, 2, 3, 4]
+    [eval.policy_config]
+    model = "gpt-4"
+    provider = "openai"
+    ```
+2. **Prompt Learning Format:**
+    ```toml
+    [prompt_learning]
+    task_app_id = "banking77"
+    task_app_url = "http://localhost:8103"
+    [prompt_learning.gepa]
+    env_name = "banking77"
+    [prompt_learning.gepa.evaluation]
+    seeds = [0, 1, 2, 3, 4]
+    ```
+**See Also:**
+    - `synth_ai.cli.commands.eval.core.eval_command()`: CLI entry point
+    - `synth_ai.cli.commands.eval.runner.run_eval()`: Uses resolved config
+"""
+from __future__ import annotations
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Literal
+from synth_ai.sdk.api.train.configs.prompt_learning import PromptLearningConfig
+from synth_ai.sdk.api.train.utils import load_toml
+from synth_ai.sdk.task.contracts import RolloutMode
+SeedSet = Literal["seeds", "validation_seeds", "test_pool"]
+@dataclass(slots=True)
+class EvalRunConfig:
+    """Configuration for evaluation runs.
+    This dataclass holds all configuration needed to execute an evaluation
+    against a task app. Values can come from TOML config files, CLI arguments,
+    or environment variables.
+    **Required Fields:**
+        app_id: Task app identifier
+        task_app_url: URL of running task app (or None to spawn locally)
+        seeds: List of seeds/indices to evaluate
+    **Optional Fields:**
+        env_name: Environment name (usually matches app_id)
+        policy_config: Model and provider configuration
+        backend_url: Backend URL for trace capture (enables backend mode)
+        concurrency: Number of parallel rollouts
+        return_trace: Whether to include traces in responses
+    **Example:**
+        ```python
+        config = EvalRunConfig(
+            app_id="banking77",
+            task_app_url="http://localhost:8103",
+            backend_url="http://localhost:8000",
+            env_name="banking77",
+            seeds=[0, 1, 2, 3, 4],
+            policy_config={"model": "gpt-4", "provider": "openai"},
+            concurrency=5,
+            return_trace=True,
+        )
+        ```
+    """
+    app_id: str
+    task_app_url: str | None
+    task_app_api_key: str | None
+    env_name: str | None
+    env_config: dict[str, Any] = field(default_factory=dict)
+    policy_name: str | None = None
+    policy_config: dict[str, Any] = field(default_factory=dict)
+    seeds: list[int] = field(default_factory=list)
+    ops: list[str] = field(default_factory=list)
+    mode: RolloutMode = RolloutMode.EVAL
+    return_trace: bool = False
+    trace_format: str = "compact"
+    concurrency: int = 1
+    metadata: dict[str, str] = field(default_factory=dict)
+    output_txt: Path | None = None
+    output_json: Path | None = None
+    verifier_config: dict[str, Any] | None = None
+    backend_url: str | None = None
+    backend_api_key: str | None = None
+    wait: bool = False
+    poll_interval: float = 5.0
+    traces_dir: Path | None = None
+    config_path: Path | None = None
+    timeout: float | None = None
+def load_eval_toml(path: Path) -> dict[str, Any]:
+    if not path.exists():
+        raise FileNotFoundError(f"Eval config not found: {path}")
+    return load_toml(path)
+def _select_seed_pool(
+    *,
+    seeds: list[int] | None,
+    validation_seeds: list[int] | None,
+    test_pool: list[int] | None,
+    seed_set: SeedSet,
+) -> list[int]:
+    if seed_set == "validation_seeds" and validation_seeds:
+        return validation_seeds
+    if seed_set == "test_pool" and test_pool:
+        return test_pool
+    if seeds:
+        return seeds
+    if validation_seeds:
+        return validation_seeds
+    if test_pool:
+        return test_pool
+    return []
+def _from_prompt_learning(
+    raw: dict[str, Any],
+    *,
+    seed_set: SeedSet,
+) -> EvalRunConfig:
+    pl_cfg = PromptLearningConfig.from_mapping(raw)
+    gepa = pl_cfg.gepa
+    mipro = pl_cfg.mipro
+    eval_cfg = gepa.evaluation if gepa else None
+    seeds = _select_seed_pool(
+        seeds=eval_cfg.seeds if eval_cfg else None,
+        validation_seeds=eval_cfg.validation_seeds if eval_cfg else None,
+        test_pool=eval_cfg.test_pool if eval_cfg else None,
+        seed_set=seed_set,
+    )
+    env_name = None
+    env_config: dict[str, Any] = {}
+    if gepa:
+        env_name = gepa.env_name
+        env_config = dict(gepa.env_config or {})
+    elif mipro:
+        env_name = mipro.env_name
+        env_config = dict(mipro.env_config or {})
+    policy_cfg: dict[str, Any] = {}
+    if pl_cfg.policy:
+        policy_cfg = {
+            "model": pl_cfg.policy.model,
+            "provider": pl_cfg.policy.provider,
+        }
+        if pl_cfg.policy.inference_url:
+            policy_cfg["inference_url"] = pl_cfg.policy.inference_url
+    app_id = pl_cfg.task_app_id or (env_name or "")
+    verifier_cfg = None
+    if pl_cfg.verifier:
+        if isinstance(pl_cfg.verifier, dict):
+            verifier_cfg = dict(pl_cfg.verifier)
+        else:
+            verifier_cfg = pl_cfg.verifier.model_dump(mode="python")
+    return EvalRunConfig(
+        app_id=app_id,
+        task_app_url=pl_cfg.task_app_url,
+        task_app_api_key=pl_cfg.task_app_api_key,
+        env_name=env_name,
+        env_config=env_config,
+        policy_name=pl_cfg.policy.policy_name if pl_cfg.policy else None,
+        policy_config=policy_cfg,
+        seeds=seeds,
+        ops=[],
+        concurrency=(gepa.rollout.max_concurrent if gepa and gepa.rollout else 1),
+        verifier_config=verifier_cfg,
+    )
+def _from_legacy_eval(raw: dict[str, Any]) -> EvalRunConfig:
+    eval_section = raw.get("eval", {})
+    if not isinstance(eval_section, dict):
+        eval_section = {}
+    app_id = str(eval_section.get("app_id") or "").strip()
+    model = str(eval_section.get("model") or "").strip()
+    policy_cfg = dict(eval_section.get("policy_config") or {})
+    if model and "model" not in policy_cfg:
+        policy_cfg["model"] = model
+    if "provider" not in policy_cfg and eval_section.get("provider"):
+        policy_cfg["provider"] = eval_section.get("provider")
+    return EvalRunConfig(
+        app_id=app_id,
+        task_app_url=eval_section.get("url") or eval_section.get("task_app_url"),
+        task_app_api_key=eval_section.get("task_app_api_key"),
+        env_name=eval_section.get("env_name"),
+        env_config=dict(eval_section.get("env_config") or {}),
+        policy_name=eval_section.get("policy_name"),
+        policy_config=policy_cfg,
+        seeds=list(eval_section.get("seeds") or []),
+        ops=list(eval_section.get("ops") or []),
+        return_trace=bool(eval_section.get("return_trace", False)),
+        trace_format=str(eval_section.get("trace_format") or "compact"),
+        concurrency=int(eval_section.get("concurrency") or 1),
+        metadata=dict(eval_section.get("metadata") or {}),
+    )
+def resolve_eval_config(
+    *,
+    config_path: Path | None,
+    cli_app_id: str | None,
+    cli_model: str | None,
+    cli_seeds: list[int] | None,
+    cli_url: str | None,
+    cli_env_file: str | None,
+    cli_ops: list[str] | None,
+    cli_return_trace: bool | None,
+    cli_concurrency: int | None,
+    cli_output_txt: Path | None,
+    cli_output_json: Path | None,
+    cli_backend_url: str | None,
+    cli_wait: bool,
+    cli_poll_interval: float | None,
+    cli_traces_dir: Path | None,
+    seed_set: SeedSet,
+    metadata: dict[str, str],
+) -> EvalRunConfig:
+    """Resolve evaluation configuration from multiple sources.
+    Loads configuration from TOML file (if provided) and merges with CLI arguments.
+    CLI arguments take precedence over config file values.
+    **Config File Formats:**
+    - Legacy eval format: `[eval]` section
+    - Prompt learning format: `[prompt_learning]` section
+    **Precedence Order:**
+    1. CLI arguments (highest priority)
+    2. Config file values
+    3. Default values
+    Args:
+        config_path: Path to TOML config file (optional)
+        cli_app_id: App ID from CLI (overrides config)
+        cli_model: Model name from CLI (overrides config)
+        cli_seeds: Seeds list from CLI (overrides config)
+        cli_url: Task app URL from CLI (overrides config)
+        cli_backend_url: Backend URL from CLI (overrides config)
+        cli_concurrency: Concurrency from CLI (overrides config)
+        seed_set: Which seed pool to use ("seeds", "validation_seeds", "test_pool")
+        metadata: Metadata key-value pairs for filtering
+    Returns:
+        Resolved EvalRunConfig with all values merged.
+    Raises:
+        FileNotFoundError: If config file is specified but doesn't exist.
+    Example:
+        ```python
+        config = resolve_eval_config(
+            config_path=Path("banking77_eval.toml"),
+            cli_app_id="banking77",
+            cli_seeds=[0, 1, 2],
+            cli_url="http://localhost:8103",
+            seed_set="seeds",
+            metadata={},
+        )
+        ```
+    """
+    raw: dict[str, Any] = {}
+    if config_path is not None:
+        raw = load_eval_toml(config_path)
+    if raw and ("prompt_learning" in raw or raw.get("algorithm") in {"gepa", "mipro"}):
+        resolved = _from_prompt_learning(raw, seed_set=seed_set)
+    else:
+        resolved = _from_legacy_eval(raw)
+    if cli_app_id:
+        resolved.app_id = cli_app_id
+    if cli_url:
+        resolved.task_app_url = cli_url
+    if cli_seeds:
+        resolved.seeds = cli_seeds
+    if cli_ops:
+        resolved.ops = cli_ops
+    if cli_return_trace is not None:
+        resolved.return_trace = cli_return_trace
+    if cli_concurrency is not None:
+        resolved.concurrency = cli_concurrency
+    if cli_output_txt is not None:
+        resolved.output_txt = cli_output_txt
+    if cli_output_json is not None:
+        resolved.output_json = cli_output_json
+    if cli_backend_url:
+        resolved.backend_url = cli_backend_url
+    if cli_wait:
+        resolved.wait = True
+    if cli_poll_interval is not None:
+        resolved.poll_interval = cli_poll_interval
+    if cli_traces_dir is not None:
+        resolved.traces_dir = cli_traces_dir
+    if cli_model:
+        resolved.policy_config["model"] = cli_model
+    if metadata:
+        resolved.metadata = metadata
+    if cli_env_file:
+        # Store in metadata for logging; env loading handled in core.
+        resolved.metadata.setdefault("env_file", cli_env_file)
+    resolved.config_path = config_path
+    return resolved
+__all__ = ["EvalRunConfig", "resolve_eval_config", "SeedSet"]

synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl