PyPI - synth-ai - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show

synth_ai/__init__.py +13 -13
synth_ai/cli/__init__.py +6 -15
synth_ai/cli/commands/eval/__init__.py +6 -15
synth_ai/cli/commands/eval/config.py +338 -0
synth_ai/cli/commands/eval/core.py +236 -1091
synth_ai/cli/commands/eval/runner.py +704 -0
synth_ai/cli/commands/eval/validation.py +44 -117
synth_ai/cli/commands/filter/core.py +7 -7
synth_ai/cli/commands/filter/validation.py +2 -2
synth_ai/cli/commands/smoke/core.py +7 -17
synth_ai/cli/commands/status/__init__.py +1 -64
synth_ai/cli/commands/status/client.py +50 -151
synth_ai/cli/commands/status/config.py +3 -83
synth_ai/cli/commands/status/errors.py +4 -13
synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
synth_ai/cli/commands/status/subcommands/config.py +13 -0
synth_ai/cli/commands/status/subcommands/files.py +18 -63
synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
synth_ai/cli/commands/status/subcommands/models.py +18 -62
synth_ai/cli/commands/status/subcommands/runs.py +16 -63
synth_ai/cli/commands/status/subcommands/session.py +67 -172
synth_ai/cli/commands/status/subcommands/summary.py +24 -32
synth_ai/cli/commands/status/subcommands/utils.py +41 -0
synth_ai/cli/commands/status/utils.py +16 -107
synth_ai/cli/commands/train/__init__.py +18 -20
synth_ai/cli/commands/train/errors.py +3 -3
synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
synth_ai/cli/commands/train/validation.py +7 -7
synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
synth_ai/cli/commands/train/verifier_validation.py +235 -0
synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
synth_ai/cli/lib/apps/task_app.py +12 -13
synth_ai/cli/lib/task_app_discovery.py +6 -6
synth_ai/cli/lib/train_cfgs.py +10 -10
synth_ai/cli/task_apps/__init__.py +11 -0
synth_ai/cli/task_apps/commands.py +7 -15
synth_ai/core/env.py +12 -1
synth_ai/core/errors.py +1 -2
synth_ai/core/integrations/cloudflare.py +209 -33
synth_ai/core/tracing_v3/abstractions.py +46 -0
synth_ai/data/__init__.py +3 -30
synth_ai/data/enums.py +1 -20
synth_ai/data/rewards.py +100 -3
synth_ai/products/graph_evolve/__init__.py +1 -2
synth_ai/products/graph_evolve/config.py +16 -16
synth_ai/products/graph_evolve/converters/__init__.py +3 -3
synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
synth_ai/products/graph_gepa/__init__.py +23 -0
synth_ai/products/graph_gepa/converters/__init__.py +19 -0
synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
synth_ai/sdk/__init__.py +45 -35
synth_ai/sdk/api/eval/__init__.py +33 -0
synth_ai/sdk/api/eval/job.py +732 -0
synth_ai/sdk/api/research_agent/__init__.py +276 -66
synth_ai/sdk/api/train/builders.py +181 -0
synth_ai/sdk/api/train/cli.py +41 -33
synth_ai/sdk/api/train/configs/__init__.py +6 -4
synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
synth_ai/sdk/api/train/configs/rl.py +264 -16
synth_ai/sdk/api/train/configs/sft.py +165 -1
synth_ai/sdk/api/train/graph_validators.py +12 -12
synth_ai/sdk/api/train/graphgen.py +169 -51
synth_ai/sdk/api/train/graphgen_models.py +95 -45
synth_ai/sdk/api/train/local_api.py +10 -0
synth_ai/sdk/api/train/pollers.py +36 -0
synth_ai/sdk/api/train/prompt_learning.py +390 -60
synth_ai/sdk/api/train/rl.py +41 -5
synth_ai/sdk/api/train/sft.py +2 -0
synth_ai/sdk/api/train/task_app.py +20 -0
synth_ai/sdk/api/train/validators.py +17 -17
synth_ai/sdk/graphs/completions.py +239 -33
synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
synth_ai/sdk/learning/__init__.py +35 -5
synth_ai/sdk/learning/context_learning_client.py +531 -0
synth_ai/sdk/learning/context_learning_types.py +294 -0
synth_ai/sdk/learning/prompt_learning_client.py +1 -1
synth_ai/sdk/learning/prompt_learning_types.py +2 -1
synth_ai/sdk/learning/rl/__init__.py +0 -4
synth_ai/sdk/learning/rl/contracts.py +0 -4
synth_ai/sdk/localapi/__init__.py +40 -0
synth_ai/sdk/localapi/apps/__init__.py +28 -0
synth_ai/sdk/localapi/client.py +10 -0
synth_ai/sdk/localapi/contracts.py +10 -0
synth_ai/sdk/localapi/helpers.py +519 -0
synth_ai/sdk/localapi/rollouts.py +93 -0
synth_ai/sdk/localapi/server.py +29 -0
synth_ai/sdk/localapi/template.py +49 -0
synth_ai/sdk/streaming/handlers.py +6 -6
synth_ai/sdk/streaming/streamer.py +10 -6
synth_ai/sdk/task/__init__.py +18 -5
synth_ai/sdk/task/apps/__init__.py +37 -1
synth_ai/sdk/task/client.py +9 -1
synth_ai/sdk/task/config.py +6 -11
synth_ai/sdk/task/contracts.py +137 -95
synth_ai/sdk/task/in_process.py +32 -22
synth_ai/sdk/task/in_process_runner.py +9 -4
synth_ai/sdk/task/rubrics/__init__.py +2 -3
synth_ai/sdk/task/rubrics/loaders.py +4 -4
synth_ai/sdk/task/rubrics/strict.py +3 -4
synth_ai/sdk/task/server.py +76 -16
synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
synth_ai/sdk/task/validators.py +34 -49
synth_ai/sdk/training/__init__.py +7 -16
synth_ai/sdk/tunnels/__init__.py +118 -0
synth_ai/sdk/tunnels/cleanup.py +83 -0
synth_ai/sdk/tunnels/ports.py +120 -0
synth_ai/sdk/tunnels/tunneled_api.py +363 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
synth_ai/cli/commands/baseline/__init__.py +0 -12
synth_ai/cli/commands/baseline/core.py +0 -636
synth_ai/cli/commands/baseline/list.py +0 -94
synth_ai/cli/commands/eval/errors.py +0 -81
synth_ai/cli/commands/status/formatters.py +0 -164
synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
synth_ai/cli/commands/status/subcommands/usage.py +0 -203
synth_ai/cli/commands/train/judge_validation.py +0 -305
synth_ai/cli/usage.py +0 -159
synth_ai/data/specs.py +0 -36
synth_ai/sdk/api/research_agent/cli.py +0 -428
synth_ai/sdk/api/research_agent/config.py +0 -357
synth_ai/sdk/api/research_agent/job.py +0 -717
synth_ai/sdk/baseline/__init__.py +0 -25
synth_ai/sdk/baseline/config.py +0 -209
synth_ai/sdk/baseline/discovery.py +0 -216
synth_ai/sdk/baseline/execution.py +0 -154
synth_ai/sdk/judging/__init__.py +0 -15
synth_ai/sdk/judging/base.py +0 -24
synth_ai/sdk/judging/client.py +0 -191
synth_ai/sdk/judging/types.py +0 -42
synth_ai/sdk/research_agent/__init__.py +0 -34
synth_ai/sdk/research_agent/container_builder.py +0 -328
synth_ai/sdk/research_agent/container_spec.py +0 -198
synth_ai/sdk/research_agent/defaults.py +0 -34
synth_ai/sdk/research_agent/results_collector.py +0 -69
synth_ai/sdk/specs/__init__.py +0 -46
synth_ai/sdk/specs/dataclasses.py +0 -149
synth_ai/sdk/specs/loader.py +0 -144
synth_ai/sdk/specs/serializer.py +0 -199
synth_ai/sdk/specs/validation.py +0 -250
synth_ai/sdk/tracing/__init__.py +0 -39
synth_ai/sdk/usage/__init__.py +0 -37
synth_ai/sdk/usage/client.py +0 -171
synth_ai/sdk/usage/models.py +0 -261
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0

synth_ai/sdk/api/train/cli.py CHANGED Viewed

@@ -38,7 +38,7 @@ from synth_ai.sdk.streaming import (
 )
 from .builders import build_prompt_learning_payload, build_rl_payload, build_sft_payload
-from .task_app import check_task_app_health
+from .local_api import check_local_api_health
 from .graphgen import GraphGenJob
 from .graphgen_models import load_graphgen_taskset
 from .context_learning import ContextLearningJob
@@ -465,23 +465,23 @@ _logger.debug("[TRAIN_MODULE] Module synth_ai.sdk.api.train.cli imported")
 @click.option(
     "--type",
     "train_type_override",
-    type=click.Choice(["prompt", "rl", "sft", "adas", "context_learning"]),
+    type=click.Choice(["prompt", "rl", "sft", "graphgen", "context_learning"]),
     default=None,
-    help="Explicitly set training type. Required for ADAS (uses JSON datasets).",
+    help="Explicitly set training type. Required for GraphGen (uses JSON datasets).",
 )
 @click.option(
     "--rollout-budget",
     "rollout_budget",
     type=int,
     default=None,
-    help="Rollout budget for ADAS optimization (default: 100)",
+    help="Rollout budget for GraphGen optimization (default: 100)",
 )
 @click.option(
     "--proposer-effort",
     "proposer_effort",
     type=click.Choice(["low", "medium", "high"]),
     default=None,
-    help="Proposer effort level for ADAS (default: medium)",
+    help="Proposer effort level for GraphGen (default: medium)",
 )
 def train_command(
     cfg_path: Path | None,
@@ -507,7 +507,7 @@ def train_command(
     proposer_effort: str | None,
 ) -> None:
-    """Interactive launcher for RL / SFT / Prompt Learning / ADAS / Context Learning jobs."""
+    """Interactive launcher for RL / SFT / Prompt Learning / GraphGen / Context Learning jobs."""
     import traceback
     ctx: dict[str, Any] = {
@@ -544,18 +544,18 @@ def train_command(
             load_dotenv(Path(env_file), override=True)
             click.echo(f"[TRAIN_CMD] Loaded explicit .env: {env_file}", err=True)
-        # Handle ADAS specially - it uses JSON datasets, not TOML configs
-        if train_type_override == "adas":
-            # For ADAS, dataset_path is required and cfg_path is ignored
+        # Handle GraphGen specially - it uses JSON datasets, not TOML configs
+        if train_type_override == "graphgen":
+            # For GraphGen, dataset_path is required and cfg_path is ignored
             if not dataset_path:
                 raise click.ClickException(
-                    "ADAS requires --dataset flag with path to JSON dataset file.\n"
-                    "Usage: synth-ai train --type adas --dataset my_tasks.json"
+                    "GraphGen requires --dataset flag with path to JSON dataset file.\n"
+                    "Usage: synth-ai train --type graphgen --dataset my_tasks.json"
                 )
-            train_type = "adas"
-            click.echo(f"[TRAIN_CMD] ADAS mode: using dataset {dataset_path}", err=True)
+            train_type = train_type_override
+            click.echo(f"[TRAIN_CMD] GraphGen mode: using dataset {dataset_path}", err=True)
         else:
-            # Non-ADAS: use TOML config
+            # Non-GraphGen: use TOML config
             if not cfg_path:
                 available_cfgs = find_train_cfgs_in_cwd()
                 if len(available_cfgs) == 1:
@@ -614,8 +614,8 @@ def train_command(
             if backend_base_url_env:
                 click.echo(f"  (from BACKEND_BASE_URL={backend_base_url_env})")
-        # Skip TOML-based validation for ADAS (uses JSON datasets)
-        if train_type != "adas" and cfg_path:
+        # Skip TOML-based validation for GraphGen (uses JSON datasets)
+        if train_type != "graphgen" and cfg_path:
             _validate_openai_key_if_provider_is_openai(cfg_path)
         match train_type:
@@ -681,12 +681,12 @@ def train_command(
                     stream_format=stream_format,
                     examples_limit=examples_limit,
                 )
-            case "adas":
+            case "graphgen":
                 if not dataset_path:
-                    raise click.ClickException("ADAS requires a dataset path.")
-                adas_dataset_path = Path(dataset_path).expanduser().resolve()
-                handle_adas(
-                    dataset_path=adas_dataset_path,
+                    raise click.ClickException("GraphGen requires a dataset path.")
+                graphgen_dataset_path = Path(dataset_path).expanduser().resolve()
+                handle_graphgen(
+                    dataset_path=graphgen_dataset_path,
                     backend_base=backend_base,
                     synth_key=synth_api_key,
                     policy_model=model,
@@ -930,7 +930,7 @@ def handle_rl(
     os.environ["ENVIRONMENT_API_KEY"] = env_key
     click.echo("Performing task app health check…")
-    health = check_task_app_health(build.task_url, env_key)
+    health = check_local_api_health(build.task_url, env_key)
     if not health.ok:
         click.echo(f"Task app health check failed: {health.detail}")
         raise click.ClickException("Aborting due to failing health check")
@@ -1169,7 +1169,7 @@ def handle_sft(
                 limited_path.parent.rmdir()
-def handle_adas(
+def handle_graphgen(
     *,
     dataset_path: Path,
     backend_base: str,
@@ -1182,43 +1182,51 @@ def handle_adas(
     poll_interval: float,
     stream_format: str,
 ) -> None:
-    """Handle ADAS workflow optimization job creation and streaming.
+    """Handle GraphGen workflow optimization job creation and streaming.
-    ADAS uses JSON dataset files and auto-generates task apps.
+    GraphGen uses JSON dataset files and auto-generates task apps.
     """
     ctx: dict[str, Any] = {
         "dataset_path": str(dataset_path),
         "backend_base": backend_base,
         "poll": poll,
     }
-    log_info("handle_adas invoked", ctx=ctx)
+    log_info("handle_graphgen invoked", ctx=ctx)
     # Load dataset
-    click.echo(f"Loading ADAS dataset from: {dataset_path}")
+    click.echo(f"Loading GraphGen dataset from: {dataset_path}")
     try:
         dataset = load_graphgen_taskset(dataset_path)
     except FileNotFoundError:
         raise click.ClickException(f"Dataset file not found: {dataset_path}")
     except ValueError as e:
-        raise click.ClickException(f"Invalid ADAS dataset format: {e}")
+        raise click.ClickException(f"Invalid GraphGen dataset format: {e}")
+    problem_spec = None
+    try:
+        raw_dataset = json.loads(dataset_path.read_text())
+        problem_spec = raw_dataset.get("problem_spec") or raw_dataset.get("initial_prompt")
+    except Exception:
+        problem_spec = None
     click.echo(f"Dataset loaded: {dataset.metadata.name}")
     click.echo(f"  Tasks: {len(dataset.tasks)}")
     click.echo(f"  Gold outputs: {len(dataset.gold_outputs)}")
-    click.echo(f"  Judge mode: {dataset.judge_config.mode}")
+    click.echo(f"  Verifier mode: {dataset.verifier_config.mode}")
-    # Create ADAS job
+    # Create GraphGen job
     job = GraphGenJob.from_dataset(
         dataset=dataset,
         policy_model=policy_model or "gpt-4o-mini",
         rollout_budget=rollout_budget or 100,
         proposer_effort=proposer_effort or "medium",  # type: ignore
+        problem_spec=problem_spec,
         backend_url=backend_base,
         api_key=synth_key,
         auto_start=True,
     )
-    click.echo("\n=== Submitting ADAS Job ===")
+    click.echo("\n=== Submitting GraphGen Job ===")
     click.echo(f"Policy model: {job.config.policy_model}")
     click.echo(f"Rollout budget: {job.config.rollout_budget}")
     click.echo(f"Proposer effort: {job.config.proposer_effort}")
@@ -1229,7 +1237,7 @@ def handle_adas(
         raise click.ClickException(str(e))
     click.echo(f"\n✓ Job created:")
-    click.echo(f"  ADAS Job ID: {result.graphgen_job_id}")
+    click.echo(f"  GraphGen Job ID: {result.graphgen_job_id}")
     click.echo(f"  Status: {result.status}")
     if not poll:
@@ -1979,7 +1987,7 @@ def handle_prompt_learning(
     click.echo("Performing task app health check…")
     click.echo(f"Task app URL: {build.task_url}")
     click.echo("⏳ Checking /health endpoint (timeout: 10s)...")
-    health = check_task_app_health(build.task_url, env_key, timeout=10.0)
+    health = check_local_api_health(build.task_url, env_key, timeout=10.0)
     if not health.ok:
         click.echo(f"❌ Task app health check failed: {health.detail}")
         click.echo(f"   Health status: {health.health_status}")

synth_ai/sdk/api/train/configs/__init__.py CHANGED Viewed

@@ -6,12 +6,11 @@ from .prompt_learning import (
     MIPROConfig,
     PromptLearningConfig,
     PromptLearningPolicyConfig,
+    PromptLearningVerifierConfig,
     PromptPatternConfig,
 )
 from .rl import (
     EvaluationConfig,
-    JudgeConfig,
-    JudgeOptionsConfig,
     ModelConfig,
     RewardsConfig,
     RLConfig,
@@ -19,6 +18,8 @@ from .rl import (
     RLTrainingConfig,
     RolloutConfig,
     RubricConfig,
+    VerifierConfig,
+    VerifierOptionsConfig,
     WeightSyncConfig,
 )
 from .sft import (
@@ -40,8 +41,9 @@ __all__ = [
     "HyperparametersConfig",
     "HyperparametersParallelism",
     "JobConfig",
-    "JudgeConfig",
-    "JudgeOptionsConfig",
+    "PromptLearningVerifierConfig",
+    "VerifierConfig",
+    "VerifierOptionsConfig",
     "LoraConfig",
     "MIPROConfig",
     "MessagePatternConfig",

synth_ai/sdk/api/train/configs/prompt_learning.py CHANGED Viewed

@@ -1,4 +1,40 @@
-"""Prompt Learning configuration models for MIPRO and GEPA."""
+"""Prompt Learning configuration models for MIPRO and GEPA.
+This module defines the configuration schema for prompt optimization jobs using:
+- **GEPA**: Genetic Evolution of Prompt Architectures - evolutionary optimization
+- **MIPRO**: Meta-learning with bootstrap phase and TPE optimization
+Example TOML configuration (GEPA):
+    ```toml
+    [prompt_learning]
+    algorithm = "gepa"
+    task_app_url = "https://your-tunnel.trycloudflare.com"
+    task_app_api_key = "$ENVIRONMENT_API_KEY"
+    [prompt_learning.policy]
+    model = "gpt-4o-mini"
+    provider = "openai"
+    [prompt_learning.gepa]
+    env_name = "banking77"
+    proposer_effort = "LOW"
+    [prompt_learning.gepa.rollout]
+    budget = 100
+    max_concurrent = 20
+    [prompt_learning.gepa.evaluation]
+    seeds = {start = 0, end = 50}
+    [prompt_learning.gepa.population]
+    num_generations = 10
+    children_per_generation = 5
+    ```
+See Also:
+    - Training reference: /training/gepa, /training/mipro
+    - Quickstart: /quickstart/prompt-optimization-gepa
+"""
 from __future__ import annotations
 from collections.abc import Mapping, Sequence
@@ -156,7 +192,7 @@ class MIPROSeedConfig(ExtraModel):
         return _parse_seeds(v) or []
-class PromptLearningJudgeConfig(ExtraModel):
+class PromptLearningVerifierConfig(ExtraModel):
     """Verifier configuration shared by GEPA and MIPRO.
     This configures LLM-based evaluation of agent trajectories during prompt optimization.
@@ -166,15 +202,13 @@ class PromptLearningJudgeConfig(ExtraModel):
         enabled: Whether to enable verifier-based scoring.
         reward_source: Source of the final reward for optimization.
             - "task_app": Use only environment rewards from task app (default).
-            - "judge": Use only verifier quality scores.
+            - "verifier": Use only verifier quality scores.
             - "fused": Weighted combination of environment and verifier rewards.
         backend_base: Base URL for the verifier service (e.g. "https://api.usesynth.ai").
         backend_api_key_env: Env var containing the Synth API key (default: "SYNTH_API_KEY").
         backend_provider: Provider for the verifier model (e.g. "openai", "groq").
         backend_model: Model used to execute the verifier rubric or graph (e.g. "gpt-4o-mini").
-        synth_verifier_id: ID or Name of a registered Verifier Graph or Rubric on the backend.
-            Use this to point to a specific, versioned verifier artifact.
-        backend_rubric_id: Legacy alias for synth_verifier_id.
+        verifier_graph_id: ID or name of a registered Verifier Graph on the backend.
         backend_event_enabled: Whether to enable fine-grained event-level scoring.
         backend_outcome_enabled: Whether to enable episode-level outcome scoring.
         weight_env: Weight for environment rewards in "fused" mode (default: 1.0).
@@ -182,13 +216,12 @@ class PromptLearningJudgeConfig(ExtraModel):
         weight_outcome: Weight for verifier outcome rewards in "fused" mode (default: 0.0).
     """
     enabled: bool = False
-    reward_source: Literal["task_app", "judge", "fused"] = "task_app"
+    reward_source: Literal["task_app", "verifier", "fused"] = "task_app"
     backend_base: str = ""
     backend_api_key_env: str = "SYNTH_API_KEY"
     backend_provider: str = ""
     backend_model: str = ""
-    synth_verifier_id: str = ""  # Preferred field for Registered VerifierGraph or Rubric ID
-    backend_rubric_id: str = ""  # Legacy alias for synth_verifier_id
+    verifier_graph_id: str = ""
     backend_event_enabled: bool = True
     backend_outcome_enabled: bool = True
     backend_options: Dict[str, Any] = Field(default_factory=dict)
@@ -201,21 +234,6 @@ class PromptLearningJudgeConfig(ExtraModel):
     spec_max_tokens: int = 5000
     spec_context: Optional[str] = None
-    @model_validator(mode="before")
-    @classmethod
-    def _sync_verifier_ids(cls, data: Any) -> Any:
-        """Sync synth_verifier_id and backend_rubric_id."""
-        if isinstance(data, dict):
-            if not data.get("synth_verifier_id") and data.get("backend_rubric_id"):
-                data["synth_verifier_id"] = data["backend_rubric_id"]
-            elif not data.get("backend_rubric_id") and data.get("synth_verifier_id"):
-                data["backend_rubric_id"] = data["synth_verifier_id"]
-        return data
-class PromptLearningVerifierConfig(PromptLearningJudgeConfig):
-    """Alias for PromptLearningJudgeConfig with verifier terminology."""
 class ProxyModelsConfig(ExtraModel):
     """Configuration for proxy usage on policy evaluations.
@@ -697,8 +715,8 @@ class MIPROConfig(ExtraModel):
     # Meta-update configuration
     meta_update: dict[str, Any] | None = None
-    # Judge configuration (shared with GEPA)
-    judge: PromptLearningJudgeConfig | dict[str, Any] | None = None
+    # Verifier configuration (shared with GEPA)
+    verifier: PromptLearningVerifierConfig | dict[str, Any] | None = None
     # Proxy models configuration (optional, can also be at top-level)
     proxy_models: ProxyModelsConfig | dict[str, Any] | None = None
@@ -1165,7 +1183,7 @@ class GEPAConfig(ExtraModel):
     population: GEPAPopulationConfig | None = None
     archive: GEPAArchiveConfig | None = None
     token: GEPATokenConfig | None = None
-    judge: PromptLearningJudgeConfig | dict[str, Any] | None = None
+    verifier: PromptLearningVerifierConfig | dict[str, Any] | None = None
     proxy_models: ProxyModelsConfig | dict[str, Any] | None = None  # Proxy models config (can be at top-level or gepa-specific)
     adaptive_pool: AdaptivePoolConfig | dict[str, Any] | None = None  # Adaptive pooling config
     adaptive_batch: GEPAAdaptiveBatchConfig | dict[str, Any] | None = None  # Adaptive batch config (GEPA only)
@@ -1407,7 +1425,7 @@ class GEPAConfig(ExtraModel):
         flat_data = {}
         for key, value in data.items():
-            if key in ("rollout", "evaluation", "mutation", "population", "archive", "token", "modules", "proxy_models", "adaptive_pool", "adaptive_batch", "judge"):
+            if key in ("rollout", "evaluation", "mutation", "population", "archive", "token", "modules", "proxy_models", "adaptive_pool", "adaptive_batch", "verifier"):
                 nested_data[key] = value
             else:
                 flat_data[key] = value
@@ -1483,7 +1501,83 @@ class GEPAConfig(ExtraModel):
 class PromptLearningConfig(ExtraModel):
-    """Top-level prompt learning configuration."""
+    """Root configuration for Prompt Learning jobs (GEPA and MIPRO).
+    This is the top-level config loaded from a TOML file. Use `PromptLearningConfig.from_path()`
+    to load from a file, or `PromptLearningConfig.from_mapping()` to load from a dict.
+    Prompt learning optimizes prompts for a given task app and dataset using one of
+    two algorithms:
+    - **GEPA**: Genetic Evolution of Prompt Architectures - evolutionary optimization
+      with crossover, mutation, and selection across generations
+    - **MIPRO**: Meta-learning with bootstrap phase and Tree-structured Parzen Estimator
+      (TPE) optimization for hyperparameter tuning
+    Example:
+        ```python
+        from synth_ai.sdk.api.train.configs.prompt_learning import PromptLearningConfig
+        # Load from file
+        config = PromptLearningConfig.from_path("prompt_learning.toml")
+        # Or from dict
+        config = PromptLearningConfig.from_mapping({
+            "algorithm": "gepa",
+            "task_app_url": "https://your-tunnel.trycloudflare.com",
+            "gepa": {
+                "env_name": "banking77",
+                "policy": {"model": "gpt-4o-mini", "provider": "openai"},
+                "generations": 5,
+                "population_size": 4,
+            },
+        })
+        ```
+    Attributes:
+        algorithm: Optimization algorithm - "gepa" or "mipro".
+        task_app_url: URL of your task app (typically a Cloudflare tunnel URL).
+        task_app_api_key: API key for authenticating with the task app.
+            Defaults to ENVIRONMENT_API_KEY env var.
+        task_app_id: Optional identifier for the task app (for logging).
+        initial_prompt: Initial prompt pattern to seed optimization.
+        policy: Policy (LLM) configuration for rollouts.
+        mipro: MIPRO-specific configuration (if algorithm="mipro").
+        gepa: GEPA-specific configuration (if algorithm="gepa").
+        verifier: Optional verifier configuration for LLM-based reward scoring.
+        proxy_models: Proxy models configuration for cost-effective evaluation.
+        env_config: Additional environment configuration passed to task app.
+        free_tier: Enable free tier mode with cost-effective OSS models.
+    Returns:
+        After training completes, you receive a result dict:
+        ```python
+        {
+            "status": "succeeded",
+            "best_score": 0.92,
+            "best_snapshot_id": "snap_abc123",
+            "final_prompt": "You are a helpful assistant...",
+            "metrics": {
+                "generations_completed": 5,
+                "total_rollouts": 200,
+                "improvement": 0.15,
+            },
+        }
+        ```
+    Events:
+        During training, you'll receive streaming events:
+        - `prompt_learning.created` - Job created
+        - `prompt_learning.running` - Training started
+        - `prompt_learning.generation.started` - New generation began
+        - `prompt_learning.candidate.evaluated` - Candidate prompt evaluated
+        - `prompt_learning.generation.completed` - Generation finished with best score
+        - `prompt_learning.frontier.updated` - Pareto frontier updated (new best found)
+        - `prompt_learning.succeeded` / `prompt_learning.failed` - Terminal states
+    See Also:
+        - Training reference: /training/gepa, /training/mipro
+        - Quickstart: /quickstart/prompt-optimization-gepa
+    """
     algorithm: str  # "mipro" or "gepa"
     task_app_url: str
     task_app_api_key: str | None = None
@@ -1492,7 +1586,7 @@ class PromptLearningConfig(ExtraModel):
     policy: PromptLearningPolicyConfig | None = None
     mipro: MIPROConfig | None = None
     gepa: GEPAConfig | None = None
-    judge: PromptLearningJudgeConfig | dict[str, Any] | None = None
+    verifier: PromptLearningVerifierConfig | dict[str, Any] | None = None
     proxy_models: ProxyModelsConfig | dict[str, Any] | None = None  # Proxy models config (can be at top-level or algorithm-specific)
     env_config: dict[str, Any] | None = None
@@ -1665,8 +1759,8 @@ class PromptLearningConfig(ExtraModel):
                 mipro_data["proxy_models"] = ProxyModelsConfig.model_validate(mipro_data["proxy_models"])
             # If proxy_models not specified, leave as None (defaults to disabled)
-        if "judge" in pl_data and isinstance(pl_data["judge"], dict):
-            pl_data["judge"] = PromptLearningJudgeConfig.model_validate(pl_data["judge"])
+        if "verifier" in pl_data and isinstance(pl_data["verifier"], dict):
+            pl_data["verifier"] = PromptLearningVerifierConfig.model_validate(pl_data["verifier"])
         return cls.model_validate(pl_data)
@@ -1696,7 +1790,7 @@ __all__ = [
     "PromptLearningConfig",
     "PromptLearningPolicyConfig",
     "PromptPatternConfig",
-    "PromptLearningJudgeConfig",
+    "PromptLearningVerifierConfig",
     "ProxyModelsConfig",
     "AdaptivePoolConfig",
     "AdaptiveCurriculumLevel",

synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl